]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/ena/ena.c
ena: Add sysctl support for spreading IRQs
[FreeBSD/FreeBSD.git] / sys / dev / ena / ena.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #include <sys/cdefs.h>
31 #include "opt_rss.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/bus.h>
36 #include <sys/endian.h>
37 #include <sys/eventhandler.h>
38 #include <sys/kernel.h>
39 #include <sys/kthread.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/module.h>
43 #include <sys/rman.h>
44 #include <sys/smp.h>
45 #include <sys/socket.h>
46 #include <sys/sockio.h>
47 #include <sys/sysctl.h>
48 #include <sys/taskqueue.h>
49 #include <sys/time.h>
50
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53
54 #include <machine/atomic.h>
55 #include <machine/bus.h>
56 #include <machine/in_cksum.h>
57 #include <machine/resource.h>
58
59 #include <dev/pci/pcireg.h>
60 #include <dev/pci/pcivar.h>
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 #include <net/if_types.h>
69 #include <net/if_var.h>
70 #include <net/if_vlan_var.h>
71 #include <netinet/in.h>
72 #include <netinet/in_systm.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip6.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78
79 #include "ena.h"
80 #include "ena_datapath.h"
81 #include "ena_rss.h"
82 #include "ena_sysctl.h"
83
84 #ifdef DEV_NETMAP
85 #include "ena_netmap.h"
86 #endif /* DEV_NETMAP */
87
88 /*********************************************************
89  *  Function prototypes
90  *********************************************************/
91 static int ena_probe(device_t);
92 static void ena_intr_msix_mgmnt(void *);
93 static void ena_free_pci_resources(struct ena_adapter *);
94 static int ena_change_mtu(if_t, int);
95 static inline void ena_alloc_counters(counter_u64_t *, int);
96 static inline void ena_free_counters(counter_u64_t *, int);
97 static inline void ena_reset_counters(counter_u64_t *, int);
98 static void ena_init_io_rings_common(struct ena_adapter *, struct ena_ring *,
99     uint16_t);
100 static void ena_init_io_rings_basic(struct ena_adapter *);
101 static void ena_init_io_rings_advanced(struct ena_adapter *);
102 static void ena_init_io_rings(struct ena_adapter *);
103 static void ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
104 static void ena_free_all_io_rings_resources(struct ena_adapter *);
105 static int ena_setup_tx_dma_tag(struct ena_adapter *);
106 static int ena_free_tx_dma_tag(struct ena_adapter *);
107 static int ena_setup_rx_dma_tag(struct ena_adapter *);
108 static int ena_free_rx_dma_tag(struct ena_adapter *);
109 static void ena_release_all_tx_dmamap(struct ena_ring *);
110 static int ena_setup_tx_resources(struct ena_adapter *, int);
111 static void ena_free_tx_resources(struct ena_adapter *, int);
112 static int ena_setup_all_tx_resources(struct ena_adapter *);
113 static void ena_free_all_tx_resources(struct ena_adapter *);
114 static int ena_setup_rx_resources(struct ena_adapter *, unsigned int);
115 static void ena_free_rx_resources(struct ena_adapter *, unsigned int);
116 static int ena_setup_all_rx_resources(struct ena_adapter *);
117 static void ena_free_all_rx_resources(struct ena_adapter *);
118 static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
119     struct ena_rx_buffer *);
120 static void ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
121     struct ena_rx_buffer *);
122 static void ena_free_rx_bufs(struct ena_adapter *, unsigned int);
123 static void ena_refill_all_rx_bufs(struct ena_adapter *);
124 static void ena_free_all_rx_bufs(struct ena_adapter *);
125 static void ena_free_tx_bufs(struct ena_adapter *, unsigned int);
126 static void ena_free_all_tx_bufs(struct ena_adapter *);
127 static void ena_destroy_all_tx_queues(struct ena_adapter *);
128 static void ena_destroy_all_rx_queues(struct ena_adapter *);
129 static void ena_destroy_all_io_queues(struct ena_adapter *);
130 static int ena_create_io_queues(struct ena_adapter *);
131 static int ena_handle_msix(void *);
132 static int ena_enable_msix(struct ena_adapter *);
133 static void ena_setup_mgmnt_intr(struct ena_adapter *);
134 static int ena_setup_io_intr(struct ena_adapter *);
135 static int ena_request_mgmnt_irq(struct ena_adapter *);
136 static int ena_request_io_irq(struct ena_adapter *);
137 static void ena_free_mgmnt_irq(struct ena_adapter *);
138 static void ena_free_io_irq(struct ena_adapter *);
139 static void ena_free_irqs(struct ena_adapter *);
140 static void ena_disable_msix(struct ena_adapter *);
141 static void ena_unmask_all_io_irqs(struct ena_adapter *);
142 static int ena_up_complete(struct ena_adapter *);
143 static uint64_t ena_get_counter(if_t, ift_counter);
144 static int ena_media_change(if_t);
145 static void ena_media_status(if_t, struct ifmediareq *);
146 static void ena_init(void *);
147 static int ena_ioctl(if_t, u_long, caddr_t);
148 static int ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
149 static void ena_update_host_info(struct ena_admin_host_info *, if_t);
150 static void ena_update_hwassist(struct ena_adapter *);
151 static int ena_setup_ifnet(device_t, struct ena_adapter *,
152     struct ena_com_dev_get_features_ctx *);
153 static int ena_enable_wc(device_t, struct resource *);
154 static int ena_set_queues_placement_policy(device_t, struct ena_com_dev *,
155     struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *);
156 static int ena_map_llq_mem_bar(device_t, struct ena_com_dev *);
157 static uint32_t ena_calc_max_io_queue_num(device_t, struct ena_com_dev *,
158     struct ena_com_dev_get_features_ctx *);
159 static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *);
160 static void ena_config_host_info(struct ena_com_dev *, device_t);
161 static int ena_attach(device_t);
162 static int ena_detach(device_t);
163 static int ena_device_init(struct ena_adapter *, device_t,
164     struct ena_com_dev_get_features_ctx *, int *);
165 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *);
166 static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
167 static void unimplemented_aenq_handler(void *, struct ena_admin_aenq_entry *);
168 static int ena_copy_eni_metrics(struct ena_adapter *);
169 static void ena_timer_service(void *);
170
171 static char ena_version[] = ENA_DEVICE_NAME ENA_DRV_MODULE_NAME
172     " v" ENA_DRV_MODULE_VERSION;
173
174 static ena_vendor_info_t ena_vendor_info_array[] = {
175         { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0 },
176         { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF_RSERV0, 0 },
177         { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0 },
178         { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF_RSERV0, 0 },
179         /* Last entry */
180         { 0, 0, 0 }
181 };
182
183 struct sx ena_global_lock;
184
185 /*
186  * Contains pointers to event handlers, e.g. link state chage.
187  */
188 static struct ena_aenq_handlers aenq_handlers;
189
190 void
191 ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
192 {
193         if (error != 0)
194                 return;
195         *(bus_addr_t *)arg = segs[0].ds_addr;
196 }
197
198 int
199 ena_dma_alloc(device_t dmadev, bus_size_t size, ena_mem_handle_t *dma,
200     int mapflags, bus_size_t alignment, int domain)
201 {
202         struct ena_adapter *adapter = device_get_softc(dmadev);
203         device_t pdev = adapter->pdev;
204         uint32_t maxsize;
205         uint64_t dma_space_addr;
206         int error;
207
208         maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE;
209
210         dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
211         if (unlikely(dma_space_addr == 0))
212                 dma_space_addr = BUS_SPACE_MAXADDR;
213
214         error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */
215             alignment, 0,      /* alignment, bounds             */
216             dma_space_addr,    /* lowaddr of exclusion window   */
217             BUS_SPACE_MAXADDR, /* highaddr of exclusion window  */
218             NULL, NULL,        /* filter, filterarg             */
219             maxsize,           /* maxsize                       */
220             1,                 /* nsegments                     */
221             maxsize,           /* maxsegsize                    */
222             BUS_DMA_ALLOCNOW,  /* flags                         */
223             NULL,              /* lockfunc                      */
224             NULL,              /* lockarg                       */
225             &dma->tag);
226         if (unlikely(error != 0)) {
227                 ena_log(pdev, ERR, "bus_dma_tag_create failed: %d\n", error);
228                 goto fail_tag;
229         }
230
231         error = bus_dma_tag_set_domain(dma->tag, domain);
232         if (unlikely(error != 0)) {
233                 ena_log(pdev, ERR, "bus_dma_tag_set_domain failed: %d\n",
234                     error);
235                 goto fail_map_create;
236         }
237
238         error = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr,
239             BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map);
240         if (unlikely(error != 0)) {
241                 ena_log(pdev, ERR, "bus_dmamem_alloc(%ju) failed: %d\n",
242                     (uintmax_t)size, error);
243                 goto fail_map_create;
244         }
245
246         dma->paddr = 0;
247         error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size,
248             ena_dmamap_callback, &dma->paddr, mapflags);
249         if (unlikely((error != 0) || (dma->paddr == 0))) {
250                 ena_log(pdev, ERR, "bus_dmamap_load failed: %d\n", error);
251                 goto fail_map_load;
252         }
253
254         bus_dmamap_sync(dma->tag, dma->map,
255             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
256
257         return (0);
258
259 fail_map_load:
260         bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
261 fail_map_create:
262         bus_dma_tag_destroy(dma->tag);
263 fail_tag:
264         dma->tag = NULL;
265         dma->vaddr = NULL;
266         dma->paddr = 0;
267
268         return (error);
269 }
270
271 static void
272 ena_free_pci_resources(struct ena_adapter *adapter)
273 {
274         device_t pdev = adapter->pdev;
275
276         if (adapter->memory != NULL) {
277                 bus_release_resource(pdev, SYS_RES_MEMORY,
278                     PCIR_BAR(ENA_MEM_BAR), adapter->memory);
279         }
280
281         if (adapter->registers != NULL) {
282                 bus_release_resource(pdev, SYS_RES_MEMORY,
283                     PCIR_BAR(ENA_REG_BAR), adapter->registers);
284         }
285
286         if (adapter->msix != NULL) {
287                 bus_release_resource(pdev, SYS_RES_MEMORY, adapter->msix_rid,
288                     adapter->msix);
289         }
290 }
291
292 static int
293 ena_probe(device_t dev)
294 {
295         ena_vendor_info_t *ent;
296         uint16_t pci_vendor_id = 0;
297         uint16_t pci_device_id = 0;
298
299         pci_vendor_id = pci_get_vendor(dev);
300         pci_device_id = pci_get_device(dev);
301
302         ent = ena_vendor_info_array;
303         while (ent->vendor_id != 0) {
304                 if ((pci_vendor_id == ent->vendor_id) &&
305                     (pci_device_id == ent->device_id)) {
306                         ena_log_raw(DBG, "vendor=%x device=%x\n", pci_vendor_id,
307                             pci_device_id);
308
309                         device_set_desc(dev, ENA_DEVICE_DESC);
310                         return (BUS_PROBE_DEFAULT);
311                 }
312
313                 ent++;
314         }
315
316         return (ENXIO);
317 }
318
319 static int
320 ena_change_mtu(if_t ifp, int new_mtu)
321 {
322         struct ena_adapter *adapter = if_getsoftc(ifp);
323         device_t pdev = adapter->pdev;
324         int rc;
325
326         if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
327                 ena_log(pdev, ERR, "Invalid MTU setting. new_mtu: %d max mtu: %d min mtu: %d\n",
328                     new_mtu, adapter->max_mtu, ENA_MIN_MTU);
329                 return (EINVAL);
330         }
331
332         rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
333         if (likely(rc == 0)) {
334                 ena_log(pdev, DBG, "set MTU to %d\n", new_mtu);
335                 if_setmtu(ifp, new_mtu);
336         } else {
337                 ena_log(pdev, ERR, "Failed to set MTU to %d\n", new_mtu);
338         }
339
340         return (rc);
341 }
342
343 static inline void
344 ena_alloc_counters(counter_u64_t *begin, int size)
345 {
346         counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
347
348         for (; begin < end; ++begin)
349                 *begin = counter_u64_alloc(M_WAITOK);
350 }
351
352 static inline void
353 ena_free_counters(counter_u64_t *begin, int size)
354 {
355         counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
356
357         for (; begin < end; ++begin)
358                 counter_u64_free(*begin);
359 }
360
361 static inline void
362 ena_reset_counters(counter_u64_t *begin, int size)
363 {
364         counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
365
366         for (; begin < end; ++begin)
367                 counter_u64_zero(*begin);
368 }
369
370 static void
371 ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
372     uint16_t qid)
373 {
374         ring->qid = qid;
375         ring->adapter = adapter;
376         ring->ena_dev = adapter->ena_dev;
377         atomic_store_8(&ring->first_interrupt, 0);
378         ring->no_interrupt_event_cnt = 0;
379 }
380
381 static void
382 ena_init_io_rings_basic(struct ena_adapter *adapter)
383 {
384         struct ena_com_dev *ena_dev;
385         struct ena_ring *txr, *rxr;
386         struct ena_que *que;
387         int i;
388
389         ena_dev = adapter->ena_dev;
390
391         for (i = 0; i < adapter->num_io_queues; i++) {
392                 txr = &adapter->tx_ring[i];
393                 rxr = &adapter->rx_ring[i];
394
395                 /* TX/RX common ring state */
396                 ena_init_io_rings_common(adapter, txr, i);
397                 ena_init_io_rings_common(adapter, rxr, i);
398
399                 /* TX specific ring state */
400                 txr->tx_max_header_size = ena_dev->tx_max_header_size;
401                 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
402
403                 que = &adapter->que[i];
404                 que->adapter = adapter;
405                 que->id = i;
406                 que->tx_ring = txr;
407                 que->rx_ring = rxr;
408
409                 txr->que = que;
410                 rxr->que = que;
411
412                 rxr->empty_rx_queue = 0;
413                 rxr->rx_mbuf_sz = ena_mbuf_sz;
414         }
415 }
416
417 static void
418 ena_init_io_rings_advanced(struct ena_adapter *adapter)
419 {
420         struct ena_ring *txr, *rxr;
421         int i;
422
423         for (i = 0; i < adapter->num_io_queues; i++) {
424                 txr = &adapter->tx_ring[i];
425                 rxr = &adapter->rx_ring[i];
426
427                 /* Allocate a buf ring */
428                 txr->buf_ring_size = adapter->buf_ring_size;
429                 txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF, M_WAITOK,
430                     &txr->ring_mtx);
431
432                 /* Allocate Tx statistics. */
433                 ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
434                     sizeof(txr->tx_stats));
435                 txr->tx_last_cleanup_ticks = ticks;
436
437                 /* Allocate Rx statistics. */
438                 ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
439                     sizeof(rxr->rx_stats));
440
441                 /* Initialize locks */
442                 snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)",
443                     device_get_nameunit(adapter->pdev), i);
444                 snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)",
445                     device_get_nameunit(adapter->pdev), i);
446
447                 mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
448         }
449 }
450
451 static void
452 ena_init_io_rings(struct ena_adapter *adapter)
453 {
454         /*
455          * IO rings initialization can be divided into the 2 steps:
456          *   1. Initialize variables and fields with initial values and copy
457          *      them from adapter/ena_dev (basic)
458          *   2. Allocate mutex, counters and buf_ring (advanced)
459          */
460         ena_init_io_rings_basic(adapter);
461         ena_init_io_rings_advanced(adapter);
462 }
463
464 static void
465 ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
466 {
467         struct ena_ring *txr = &adapter->tx_ring[qid];
468         struct ena_ring *rxr = &adapter->rx_ring[qid];
469
470         ena_free_counters((counter_u64_t *)&txr->tx_stats,
471             sizeof(txr->tx_stats));
472         ena_free_counters((counter_u64_t *)&rxr->rx_stats,
473             sizeof(rxr->rx_stats));
474
475         ENA_RING_MTX_LOCK(txr);
476         drbr_free(txr->br, M_DEVBUF);
477         ENA_RING_MTX_UNLOCK(txr);
478
479         mtx_destroy(&txr->ring_mtx);
480 }
481
482 static void
483 ena_free_all_io_rings_resources(struct ena_adapter *adapter)
484 {
485         int i;
486
487         for (i = 0; i < adapter->num_io_queues; i++)
488                 ena_free_io_ring_resources(adapter, i);
489 }
490
491 static int
492 ena_setup_tx_dma_tag(struct ena_adapter *adapter)
493 {
494         int ret;
495
496         /* Create DMA tag for Tx buffers */
497         ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
498             1, 0,                                 /* alignment, bounds       */
499             ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
500             BUS_SPACE_MAXADDR,                    /* highaddr of excl window */
501             NULL, NULL,                           /* filter, filterarg       */
502             ENA_TSO_MAXSIZE,                      /* maxsize                 */
503             adapter->max_tx_sgl_size - 1,         /* nsegments               */
504             ENA_TSO_MAXSIZE,                      /* maxsegsize              */
505             0,                                    /* flags                   */
506             NULL,                                 /* lockfunc                */
507             NULL,                                 /* lockfuncarg             */
508             &adapter->tx_buf_tag);
509
510         return (ret);
511 }
512
513 static int
514 ena_free_tx_dma_tag(struct ena_adapter *adapter)
515 {
516         int ret;
517
518         ret = bus_dma_tag_destroy(adapter->tx_buf_tag);
519
520         if (likely(ret == 0))
521                 adapter->tx_buf_tag = NULL;
522
523         return (ret);
524 }
525
526 static int
527 ena_setup_rx_dma_tag(struct ena_adapter *adapter)
528 {
529         int ret;
530
531         /* Create DMA tag for Rx buffers*/
532         ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent   */
533             1, 0,                                 /* alignment, bounds       */
534             ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
535             BUS_SPACE_MAXADDR,                    /* highaddr of excl window */
536             NULL, NULL,                           /* filter, filterarg       */
537             ena_mbuf_sz,                          /* maxsize                 */
538             adapter->max_rx_sgl_size,             /* nsegments               */
539             ena_mbuf_sz,                          /* maxsegsize              */
540             0,                                    /* flags                   */
541             NULL,                                 /* lockfunc                */
542             NULL,                                 /* lockarg                 */
543             &adapter->rx_buf_tag);
544
545         return (ret);
546 }
547
548 static int
549 ena_free_rx_dma_tag(struct ena_adapter *adapter)
550 {
551         int ret;
552
553         ret = bus_dma_tag_destroy(adapter->rx_buf_tag);
554
555         if (likely(ret == 0))
556                 adapter->rx_buf_tag = NULL;
557
558         return (ret);
559 }
560
561 static void
562 ena_release_all_tx_dmamap(struct ena_ring *tx_ring)
563 {
564         struct ena_adapter *adapter = tx_ring->adapter;
565         struct ena_tx_buffer *tx_info;
566         bus_dma_tag_t tx_tag = adapter->tx_buf_tag;
567         int i;
568 #ifdef DEV_NETMAP
569         struct ena_netmap_tx_info *nm_info;
570         int j;
571 #endif /* DEV_NETMAP */
572
573         for (i = 0; i < tx_ring->ring_size; ++i) {
574                 tx_info = &tx_ring->tx_buffer_info[i];
575 #ifdef DEV_NETMAP
576                 if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) {
577                         nm_info = &tx_info->nm_info;
578                         for (j = 0; j < ENA_PKT_MAX_BUFS; ++j) {
579                                 if (nm_info->map_seg[j] != NULL) {
580                                         bus_dmamap_destroy(tx_tag,
581                                             nm_info->map_seg[j]);
582                                         nm_info->map_seg[j] = NULL;
583                                 }
584                         }
585                 }
586 #endif /* DEV_NETMAP */
587                 if (tx_info->dmamap != NULL) {
588                         bus_dmamap_destroy(tx_tag, tx_info->dmamap);
589                         tx_info->dmamap = NULL;
590                 }
591         }
592 }
593
594 /**
595  * ena_setup_tx_resources - allocate Tx resources (Descriptors)
596  * @adapter: network interface device structure
597  * @qid: queue index
598  *
599  * Returns 0 on success, otherwise on failure.
600  **/
601 static int
602 ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
603 {
604         device_t pdev = adapter->pdev;
605         char thread_name[MAXCOMLEN + 1];
606         struct ena_que *que = &adapter->que[qid];
607         struct ena_ring *tx_ring = que->tx_ring;
608         cpuset_t *cpu_mask = NULL;
609         int size, i, err;
610 #ifdef DEV_NETMAP
611         bus_dmamap_t *map;
612         int j;
613
614         ena_netmap_reset_tx_ring(adapter, qid);
615 #endif /* DEV_NETMAP */
616
617         size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
618
619         tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
620         if (unlikely(tx_ring->tx_buffer_info == NULL))
621                 return (ENOMEM);
622
623         size = sizeof(uint16_t) * tx_ring->ring_size;
624         tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
625         if (unlikely(tx_ring->free_tx_ids == NULL))
626                 goto err_buf_info_free;
627
628         size = tx_ring->tx_max_header_size;
629         tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF,
630             M_NOWAIT | M_ZERO);
631         if (unlikely(tx_ring->push_buf_intermediate_buf == NULL))
632                 goto err_tx_ids_free;
633
634         /* Req id stack for TX OOO completions */
635         for (i = 0; i < tx_ring->ring_size; i++)
636                 tx_ring->free_tx_ids[i] = i;
637
638         /* Reset TX statistics. */
639         ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats,
640             sizeof(tx_ring->tx_stats));
641
642         tx_ring->next_to_use = 0;
643         tx_ring->next_to_clean = 0;
644         tx_ring->acum_pkts = 0;
645
646         /* Make sure that drbr is empty */
647         ENA_RING_MTX_LOCK(tx_ring);
648         drbr_flush(adapter->ifp, tx_ring->br);
649         ENA_RING_MTX_UNLOCK(tx_ring);
650
651         /* ... and create the buffer DMA maps */
652         for (i = 0; i < tx_ring->ring_size; i++) {
653                 err = bus_dmamap_create(adapter->tx_buf_tag, 0,
654                     &tx_ring->tx_buffer_info[i].dmamap);
655                 if (unlikely(err != 0)) {
656                         ena_log(pdev, ERR,
657                             "Unable to create Tx DMA map for buffer %d\n", i);
658                         goto err_map_release;
659                 }
660
661 #ifdef DEV_NETMAP
662                 if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) {
663                         map = tx_ring->tx_buffer_info[i].nm_info.map_seg;
664                         for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
665                                 err = bus_dmamap_create(adapter->tx_buf_tag, 0,
666                                     &map[j]);
667                                 if (unlikely(err != 0)) {
668                                         ena_log(pdev, ERR,
669                                             "Unable to create Tx DMA for buffer %d %d\n",
670                                             i, j);
671                                         goto err_map_release;
672                                 }
673                         }
674                 }
675 #endif /* DEV_NETMAP */
676         }
677
678         /* Allocate taskqueues */
679         TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring);
680         tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT,
681             taskqueue_thread_enqueue, &tx_ring->enqueue_tq);
682         if (unlikely(tx_ring->enqueue_tq == NULL)) {
683                 ena_log(pdev, ERR,
684                     "Unable to create taskqueue for enqueue task\n");
685                 i = tx_ring->ring_size;
686                 goto err_map_release;
687         }
688
689         tx_ring->running = true;
690
691 #ifdef RSS
692         cpu_mask = &que->cpu_mask;
693         snprintf(thread_name, sizeof(thread_name), "%s txeq %d",
694             device_get_nameunit(adapter->pdev), que->cpu);
695 #else
696         snprintf(thread_name, sizeof(thread_name), "%s txeq %d",
697             device_get_nameunit(adapter->pdev), que->id);
698 #endif
699         taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, PI_NET,
700             cpu_mask, "%s", thread_name);
701
702         return (0);
703
704 err_map_release:
705         ena_release_all_tx_dmamap(tx_ring);
706 err_tx_ids_free:
707         free(tx_ring->free_tx_ids, M_DEVBUF);
708         tx_ring->free_tx_ids = NULL;
709 err_buf_info_free:
710         free(tx_ring->tx_buffer_info, M_DEVBUF);
711         tx_ring->tx_buffer_info = NULL;
712
713         return (ENOMEM);
714 }
715
716 /**
717  * ena_free_tx_resources - Free Tx Resources per Queue
718  * @adapter: network interface device structure
719  * @qid: queue index
720  *
721  * Free all transmit software resources
722  **/
723 static void
724 ena_free_tx_resources(struct ena_adapter *adapter, int qid)
725 {
726         struct ena_ring *tx_ring = &adapter->tx_ring[qid];
727 #ifdef DEV_NETMAP
728         struct ena_netmap_tx_info *nm_info;
729         int j;
730 #endif /* DEV_NETMAP */
731
732         while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task, NULL))
733                 taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
734
735         taskqueue_free(tx_ring->enqueue_tq);
736
737         ENA_RING_MTX_LOCK(tx_ring);
738         /* Flush buffer ring, */
739         drbr_flush(adapter->ifp, tx_ring->br);
740
741         /* Free buffer DMA maps, */
742         for (int i = 0; i < tx_ring->ring_size; i++) {
743                 bus_dmamap_sync(adapter->tx_buf_tag,
744                     tx_ring->tx_buffer_info[i].dmamap, BUS_DMASYNC_POSTWRITE);
745                 bus_dmamap_unload(adapter->tx_buf_tag,
746                     tx_ring->tx_buffer_info[i].dmamap);
747                 bus_dmamap_destroy(adapter->tx_buf_tag,
748                     tx_ring->tx_buffer_info[i].dmamap);
749
750 #ifdef DEV_NETMAP
751                 if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) {
752                         nm_info = &tx_ring->tx_buffer_info[i].nm_info;
753                         for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
754                                 if (nm_info->socket_buf_idx[j] != 0) {
755                                         bus_dmamap_sync(adapter->tx_buf_tag,
756                                             nm_info->map_seg[j],
757                                             BUS_DMASYNC_POSTWRITE);
758                                         ena_netmap_unload(adapter,
759                                             nm_info->map_seg[j]);
760                                 }
761                                 bus_dmamap_destroy(adapter->tx_buf_tag,
762                                     nm_info->map_seg[j]);
763                                 nm_info->socket_buf_idx[j] = 0;
764                         }
765                 }
766 #endif /* DEV_NETMAP */
767
768                 m_freem(tx_ring->tx_buffer_info[i].mbuf);
769                 tx_ring->tx_buffer_info[i].mbuf = NULL;
770         }
771         ENA_RING_MTX_UNLOCK(tx_ring);
772
773         /* And free allocated memory. */
774         free(tx_ring->tx_buffer_info, M_DEVBUF);
775         tx_ring->tx_buffer_info = NULL;
776
777         free(tx_ring->free_tx_ids, M_DEVBUF);
778         tx_ring->free_tx_ids = NULL;
779
780         free(tx_ring->push_buf_intermediate_buf, M_DEVBUF);
781         tx_ring->push_buf_intermediate_buf = NULL;
782 }
783
784 /**
785  * ena_setup_all_tx_resources - allocate all queues Tx resources
786  * @adapter: network interface device structure
787  *
788  * Returns 0 on success, otherwise on failure.
789  **/
790 static int
791 ena_setup_all_tx_resources(struct ena_adapter *adapter)
792 {
793         int i, rc;
794
795         for (i = 0; i < adapter->num_io_queues; i++) {
796                 rc = ena_setup_tx_resources(adapter, i);
797                 if (rc != 0) {
798                         ena_log(adapter->pdev, ERR,
799                             "Allocation for Tx Queue %u failed\n", i);
800                         goto err_setup_tx;
801                 }
802         }
803
804         return (0);
805
806 err_setup_tx:
807         /* Rewind the index freeing the rings as we go */
808         while (i--)
809                 ena_free_tx_resources(adapter, i);
810         return (rc);
811 }
812
813 /**
814  * ena_free_all_tx_resources - Free Tx Resources for All Queues
815  * @adapter: network interface device structure
816  *
817  * Free all transmit software resources
818  **/
819 static void
820 ena_free_all_tx_resources(struct ena_adapter *adapter)
821 {
822         int i;
823
824         for (i = 0; i < adapter->num_io_queues; i++)
825                 ena_free_tx_resources(adapter, i);
826 }
827
828 /**
829  * ena_setup_rx_resources - allocate Rx resources (Descriptors)
830  * @adapter: network interface device structure
831  * @qid: queue index
832  *
833  * Returns 0 on success, otherwise on failure.
834  **/
835 static int
836 ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
837 {
838         device_t pdev = adapter->pdev;
839         struct ena_que *que = &adapter->que[qid];
840         struct ena_ring *rx_ring = que->rx_ring;
841         int size, err, i;
842
843         size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
844
845 #ifdef DEV_NETMAP
846         ena_netmap_reset_rx_ring(adapter, qid);
847         rx_ring->initialized = false;
848 #endif /* DEV_NETMAP */
849
850         /*
851          * Alloc extra element so in rx path
852          * we can always prefetch rx_info + 1
853          */
854         size += sizeof(struct ena_rx_buffer);
855
856         rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
857
858         size = sizeof(uint16_t) * rx_ring->ring_size;
859         rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK);
860
861         for (i = 0; i < rx_ring->ring_size; i++)
862                 rx_ring->free_rx_ids[i] = i;
863
864         /* Reset RX statistics. */
865         ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats,
866             sizeof(rx_ring->rx_stats));
867
868         rx_ring->next_to_clean = 0;
869         rx_ring->next_to_use = 0;
870
871         /* ... and create the buffer DMA maps */
872         for (i = 0; i < rx_ring->ring_size; i++) {
873                 err = bus_dmamap_create(adapter->rx_buf_tag, 0,
874                     &(rx_ring->rx_buffer_info[i].map));
875                 if (err != 0) {
876                         ena_log(pdev, ERR,
877                             "Unable to create Rx DMA map for buffer %d\n", i);
878                         goto err_buf_info_unmap;
879                 }
880         }
881
882         /* Create LRO for the ring */
883         if ((if_getcapenable(adapter->ifp) & IFCAP_LRO) != 0) {
884                 int err = tcp_lro_init(&rx_ring->lro);
885                 if (err != 0) {
886                         ena_log(pdev, ERR, "LRO[%d] Initialization failed!\n",
887                             qid);
888                 } else {
889                         ena_log(pdev, DBG, "RX Soft LRO[%d] Initialized\n",
890                             qid);
891                         rx_ring->lro.ifp = adapter->ifp;
892                 }
893         }
894
895         return (0);
896
897 err_buf_info_unmap:
898         while (i--) {
899                 bus_dmamap_destroy(adapter->rx_buf_tag,
900                     rx_ring->rx_buffer_info[i].map);
901         }
902
903         free(rx_ring->free_rx_ids, M_DEVBUF);
904         rx_ring->free_rx_ids = NULL;
905         free(rx_ring->rx_buffer_info, M_DEVBUF);
906         rx_ring->rx_buffer_info = NULL;
907         return (ENOMEM);
908 }
909
910 /**
911  * ena_free_rx_resources - Free Rx Resources
912  * @adapter: network interface device structure
913  * @qid: queue index
914  *
915  * Free all receive software resources
916  **/
917 static void
918 ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
919 {
920         struct ena_ring *rx_ring = &adapter->rx_ring[qid];
921
922         /* Free buffer DMA maps, */
923         for (int i = 0; i < rx_ring->ring_size; i++) {
924                 bus_dmamap_sync(adapter->rx_buf_tag,
925                     rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD);
926                 m_freem(rx_ring->rx_buffer_info[i].mbuf);
927                 rx_ring->rx_buffer_info[i].mbuf = NULL;
928                 bus_dmamap_unload(adapter->rx_buf_tag,
929                     rx_ring->rx_buffer_info[i].map);
930                 bus_dmamap_destroy(adapter->rx_buf_tag,
931                     rx_ring->rx_buffer_info[i].map);
932         }
933
934         /* free LRO resources, */
935         tcp_lro_free(&rx_ring->lro);
936
937         /* free allocated memory */
938         free(rx_ring->rx_buffer_info, M_DEVBUF);
939         rx_ring->rx_buffer_info = NULL;
940
941         free(rx_ring->free_rx_ids, M_DEVBUF);
942         rx_ring->free_rx_ids = NULL;
943 }
944
945 /**
946  * ena_setup_all_rx_resources - allocate all queues Rx resources
947  * @adapter: network interface device structure
948  *
949  * Returns 0 on success, otherwise on failure.
950  **/
951 static int
952 ena_setup_all_rx_resources(struct ena_adapter *adapter)
953 {
954         int i, rc = 0;
955
956         for (i = 0; i < adapter->num_io_queues; i++) {
957                 rc = ena_setup_rx_resources(adapter, i);
958                 if (rc != 0) {
959                         ena_log(adapter->pdev, ERR,
960                             "Allocation for Rx Queue %u failed\n", i);
961                         goto err_setup_rx;
962                 }
963         }
964         return (0);
965
966 err_setup_rx:
967         /* rewind the index freeing the rings as we go */
968         while (i--)
969                 ena_free_rx_resources(adapter, i);
970         return (rc);
971 }
972
973 /**
974  * ena_free_all_rx_resources - Free Rx resources for all queues
975  * @adapter: network interface device structure
976  *
977  * Free all receive software resources
978  **/
979 static void
980 ena_free_all_rx_resources(struct ena_adapter *adapter)
981 {
982         int i;
983
984         for (i = 0; i < adapter->num_io_queues; i++)
985                 ena_free_rx_resources(adapter, i);
986 }
987
988 static inline int
989 ena_alloc_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
990     struct ena_rx_buffer *rx_info)
991 {
992         device_t pdev = adapter->pdev;
993         struct ena_com_buf *ena_buf;
994         bus_dma_segment_t segs[1];
995         int nsegs, error;
996         int mlen;
997
998         /* if previous allocated frag is not used */
999         if (unlikely(rx_info->mbuf != NULL))
1000                 return (0);
1001
1002         /* Get mbuf using UMA allocator */
1003         rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1004             rx_ring->rx_mbuf_sz);
1005
1006         if (unlikely(rx_info->mbuf == NULL)) {
1007                 counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
1008                 rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1009                 if (unlikely(rx_info->mbuf == NULL)) {
1010                         counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1011                         return (ENOMEM);
1012                 }
1013                 mlen = MCLBYTES;
1014         } else {
1015                 mlen = rx_ring->rx_mbuf_sz;
1016         }
1017         /* Set mbuf length*/
1018         rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
1019
1020         /* Map packets for DMA */
1021         ena_log(pdev, DBG,
1022             "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n",
1023             adapter->rx_buf_tag, rx_info->mbuf, rx_info->mbuf->m_len);
1024         error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map,
1025             rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
1026         if (unlikely((error != 0) || (nsegs != 1))) {
1027                 ena_log(pdev, WARN,
1028                     "failed to map mbuf, error: %d, nsegs: %d\n", error, nsegs);
1029                 counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1);
1030                 goto exit;
1031         }
1032
1033         bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD);
1034
1035         ena_buf = &rx_info->ena_buf;
1036         ena_buf->paddr = segs[0].ds_addr;
1037         ena_buf->len = mlen;
1038
1039         ena_log(pdev, DBG,
1040             "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n",
1041             rx_info->mbuf, rx_info, ena_buf->len, (uintmax_t)ena_buf->paddr);
1042
1043         return (0);
1044
1045 exit:
1046         m_freem(rx_info->mbuf);
1047         rx_info->mbuf = NULL;
1048         return (EFAULT);
1049 }
1050
1051 static void
1052 ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
1053     struct ena_rx_buffer *rx_info)
1054 {
1055         if (rx_info->mbuf == NULL) {
1056                 ena_log(adapter->pdev, WARN,
1057                     "Trying to free unallocated buffer\n");
1058                 return;
1059         }
1060
1061         bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1062             BUS_DMASYNC_POSTREAD);
1063         bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map);
1064         m_freem(rx_info->mbuf);
1065         rx_info->mbuf = NULL;
1066 }
1067
1068 /**
1069  * ena_refill_rx_bufs - Refills ring with descriptors
1070  * @rx_ring: the ring which we want to feed with free descriptors
1071  * @num: number of descriptors to refill
1072  * Refills the ring with newly allocated DMA-mapped mbufs for receiving
1073  **/
1074 int
1075 ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
1076 {
1077         struct ena_adapter *adapter = rx_ring->adapter;
1078         device_t pdev = adapter->pdev;
1079         uint16_t next_to_use, req_id;
1080         uint32_t i;
1081         int rc;
1082
1083         ena_log_io(adapter->pdev, DBG, "refill qid: %d\n", rx_ring->qid);
1084
1085         next_to_use = rx_ring->next_to_use;
1086
1087         for (i = 0; i < num; i++) {
1088                 struct ena_rx_buffer *rx_info;
1089
1090                 ena_log_io(pdev, DBG, "RX buffer - next to use: %d\n",
1091                     next_to_use);
1092
1093                 req_id = rx_ring->free_rx_ids[next_to_use];
1094                 rx_info = &rx_ring->rx_buffer_info[req_id];
1095 #ifdef DEV_NETMAP
1096                 if (ena_rx_ring_in_netmap(adapter, rx_ring->qid))
1097                         rc = ena_netmap_alloc_rx_slot(adapter, rx_ring,
1098                             rx_info);
1099                 else
1100 #endif /* DEV_NETMAP */
1101                         rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
1102                 if (unlikely(rc != 0)) {
1103                         ena_log_io(pdev, WARN,
1104                             "failed to alloc buffer for rx queue %d\n",
1105                             rx_ring->qid);
1106                         break;
1107                 }
1108                 rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1109                     &rx_info->ena_buf, req_id);
1110                 if (unlikely(rc != 0)) {
1111                         ena_log_io(pdev, WARN,
1112                             "failed to add buffer for rx queue %d\n",
1113                             rx_ring->qid);
1114                         break;
1115                 }
1116                 next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1117                     rx_ring->ring_size);
1118         }
1119
1120         if (unlikely(i < num)) {
1121                 counter_u64_add(rx_ring->rx_stats.refil_partial, 1);
1122                 ena_log_io(pdev, WARN,
1123                     "refilled rx qid %d with only %d mbufs (from %d)\n",
1124                     rx_ring->qid, i, num);
1125         }
1126
1127         if (likely(i != 0))
1128                 ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1129
1130         rx_ring->next_to_use = next_to_use;
1131         return (i);
1132 }
1133
1134 int
1135 ena_update_buf_ring_size(struct ena_adapter *adapter,
1136     uint32_t new_buf_ring_size)
1137 {
1138         uint32_t old_buf_ring_size;
1139         int rc = 0;
1140         bool dev_was_up;
1141
1142         old_buf_ring_size = adapter->buf_ring_size;
1143         adapter->buf_ring_size = new_buf_ring_size;
1144
1145         dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1146         ena_down(adapter);
1147
1148         /* Reconfigure buf ring for all Tx rings. */
1149         ena_free_all_io_rings_resources(adapter);
1150         ena_init_io_rings_advanced(adapter);
1151         if (dev_was_up) {
1152                 /*
1153                  * If ena_up() fails, it's not because of recent buf_ring size
1154                  * changes. Because of that, we just want to revert old drbr
1155                  * value and trigger the reset because something else had to
1156                  * go wrong.
1157                  */
1158                 rc = ena_up(adapter);
1159                 if (unlikely(rc != 0)) {
1160                         ena_log(adapter->pdev, ERR,
1161                             "Failed to configure device after setting new drbr size: %u. Reverting old value: %u and triggering the reset\n",
1162                             new_buf_ring_size, old_buf_ring_size);
1163
1164                         /* Revert old size and trigger the reset */
1165                         adapter->buf_ring_size = old_buf_ring_size;
1166                         ena_free_all_io_rings_resources(adapter);
1167                         ena_init_io_rings_advanced(adapter);
1168
1169                         ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET,
1170                             adapter);
1171                         ena_trigger_reset(adapter, ENA_REGS_RESET_OS_TRIGGER);
1172                 }
1173         }
1174
1175         return (rc);
1176 }
1177
1178 int
1179 ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
1180     uint32_t new_rx_size)
1181 {
1182         uint32_t old_tx_size, old_rx_size;
1183         int rc = 0;
1184         bool dev_was_up;
1185
1186         old_tx_size = adapter->requested_tx_ring_size;
1187         old_rx_size = adapter->requested_rx_ring_size;
1188         adapter->requested_tx_ring_size = new_tx_size;
1189         adapter->requested_rx_ring_size = new_rx_size;
1190
1191         dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1192         ena_down(adapter);
1193
1194         /* Configure queues with new size. */
1195         ena_init_io_rings_basic(adapter);
1196         if (dev_was_up) {
1197                 rc = ena_up(adapter);
1198                 if (unlikely(rc != 0)) {
1199                         ena_log(adapter->pdev, ERR,
1200                             "Failed to configure device with the new sizes - Tx: %u Rx: %u. Reverting old values - Tx: %u Rx: %u\n",
1201                             new_tx_size, new_rx_size, old_tx_size, old_rx_size);
1202
1203                         /* Revert old size. */
1204                         adapter->requested_tx_ring_size = old_tx_size;
1205                         adapter->requested_rx_ring_size = old_rx_size;
1206                         ena_init_io_rings_basic(adapter);
1207
1208                         /* And try again. */
1209                         rc = ena_up(adapter);
1210                         if (unlikely(rc != 0)) {
1211                                 ena_log(adapter->pdev, ERR,
1212                                     "Failed to revert old queue sizes. Triggering device reset.\n");
1213                                 /*
1214                                  * If we've failed again, something had to go
1215                                  * wrong. After reset, the device should try to
1216                                  * go up
1217                                  */
1218                                 ENA_FLAG_SET_ATOMIC(
1219                                     ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1220                                 ena_trigger_reset(adapter,
1221                                     ENA_REGS_RESET_OS_TRIGGER);
1222                         }
1223                 }
1224         }
1225
1226         return (rc);
1227 }
1228
1229 static void
1230 ena_update_io_rings(struct ena_adapter *adapter, uint32_t num)
1231 {
1232         ena_free_all_io_rings_resources(adapter);
1233         /* Force indirection table to be reinitialized */
1234         ena_com_rss_destroy(adapter->ena_dev);
1235
1236         adapter->num_io_queues = num;
1237         ena_init_io_rings(adapter);
1238 }
1239
1240 int
1241 ena_update_base_cpu(struct ena_adapter *adapter, int new_num)
1242 {
1243         int old_num;
1244         int rc = 0;
1245         bool dev_was_up;
1246
1247         dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1248         old_num = adapter->irq_cpu_base;
1249
1250         ena_down(adapter);
1251
1252         adapter->irq_cpu_base = new_num;
1253
1254         if (dev_was_up) {
1255                 rc = ena_up(adapter);
1256                 if (unlikely(rc != 0)) {
1257                         ena_log(adapter->pdev, ERR,
1258                             "Failed to configure device %d IRQ base CPU. "
1259                             "Reverting to previous value: %d\n",
1260                             new_num, old_num);
1261
1262                         adapter->irq_cpu_base = old_num;
1263
1264                         rc = ena_up(adapter);
1265                         if (unlikely(rc != 0)) {
1266                                 ena_log(adapter->pdev, ERR,
1267                                     "Failed to revert to previous setup."
1268                                     "Triggering device reset.\n");
1269                                 ENA_FLAG_SET_ATOMIC(
1270                                     ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1271                                 ena_trigger_reset(adapter,
1272                                     ENA_REGS_RESET_OS_TRIGGER);
1273                         }
1274                 }
1275         }
1276         return (rc);
1277 }
1278
1279 int
1280 ena_update_cpu_stride(struct ena_adapter *adapter, uint32_t new_num)
1281 {
1282         uint32_t old_num;
1283         int rc = 0;
1284         bool dev_was_up;
1285
1286         dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1287         old_num = adapter->irq_cpu_stride;
1288
1289         ena_down(adapter);
1290
1291         adapter->irq_cpu_stride = new_num;
1292
1293         if (dev_was_up) {
1294                 rc = ena_up(adapter);
1295                 if (unlikely(rc != 0)) {
1296                         ena_log(adapter->pdev, ERR,
1297                             "Failed to configure device %d IRQ CPU stride. "
1298                             "Reverting to previous value: %d\n",
1299                             new_num, old_num);
1300
1301                         adapter->irq_cpu_stride = old_num;
1302
1303                         rc = ena_up(adapter);
1304                         if (unlikely(rc != 0)) {
1305                                 ena_log(adapter->pdev, ERR,
1306                                     "Failed to revert to previous setup."
1307                                     "Triggering device reset.\n");
1308                                 ENA_FLAG_SET_ATOMIC(
1309                                     ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1310                                 ena_trigger_reset(adapter,
1311                                     ENA_REGS_RESET_OS_TRIGGER);
1312                         }
1313                 }
1314         }
1315         return (rc);
1316 }
1317
1318 /* Caller should sanitize new_num */
1319 int
1320 ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num)
1321 {
1322         uint32_t old_num;
1323         int rc = 0;
1324         bool dev_was_up;
1325
1326         dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1327         old_num = adapter->num_io_queues;
1328         ena_down(adapter);
1329
1330         ena_update_io_rings(adapter, new_num);
1331
1332         if (dev_was_up) {
1333                 rc = ena_up(adapter);
1334                 if (unlikely(rc != 0)) {
1335                         ena_log(adapter->pdev, ERR,
1336                             "Failed to configure device with %u IO queues. "
1337                             "Reverting to previous value: %u\n",
1338                             new_num, old_num);
1339
1340                         ena_update_io_rings(adapter, old_num);
1341
1342                         rc = ena_up(adapter);
1343                         if (unlikely(rc != 0)) {
1344                                 ena_log(adapter->pdev, ERR,
1345                                     "Failed to revert to previous setup IO "
1346                                     "queues. Triggering device reset.\n");
1347                                 ENA_FLAG_SET_ATOMIC(
1348                                     ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1349                                 ena_trigger_reset(adapter,
1350                                     ENA_REGS_RESET_OS_TRIGGER);
1351                         }
1352                 }
1353         }
1354
1355         return (rc);
1356 }
1357
1358 static void
1359 ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
1360 {
1361         struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1362         unsigned int i;
1363
1364         for (i = 0; i < rx_ring->ring_size; i++) {
1365                 struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1366
1367                 if (rx_info->mbuf != NULL)
1368                         ena_free_rx_mbuf(adapter, rx_ring, rx_info);
1369 #ifdef DEV_NETMAP
1370                 if (((if_getflags(adapter->ifp) & IFF_DYING) == 0) &&
1371                     (if_getcapenable(adapter->ifp) & IFCAP_NETMAP)) {
1372                         if (rx_info->netmap_buf_idx != 0)
1373                                 ena_netmap_free_rx_slot(adapter, rx_ring,
1374                                     rx_info);
1375                 }
1376 #endif /* DEV_NETMAP */
1377         }
1378 }
1379
1380 /**
1381  * ena_refill_all_rx_bufs - allocate all queues Rx buffers
1382  * @adapter: network interface device structure
1383  *
1384  */
1385 static void
1386 ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1387 {
1388         struct ena_ring *rx_ring;
1389         int i, rc, bufs_num;
1390
1391         for (i = 0; i < adapter->num_io_queues; i++) {
1392                 rx_ring = &adapter->rx_ring[i];
1393                 bufs_num = rx_ring->ring_size - 1;
1394                 rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1395                 if (unlikely(rc != bufs_num))
1396                         ena_log_io(adapter->pdev, WARN,
1397                             "refilling Queue %d failed. "
1398                             "Allocated %d buffers from: %d\n",
1399                             i, rc, bufs_num);
1400 #ifdef DEV_NETMAP
1401                 rx_ring->initialized = true;
1402 #endif /* DEV_NETMAP */
1403         }
1404 }
1405
1406 static void
1407 ena_free_all_rx_bufs(struct ena_adapter *adapter)
1408 {
1409         int i;
1410
1411         for (i = 0; i < adapter->num_io_queues; i++)
1412                 ena_free_rx_bufs(adapter, i);
1413 }
1414
1415 /**
1416  * ena_free_tx_bufs - Free Tx Buffers per Queue
1417  * @adapter: network interface device structure
1418  * @qid: queue index
1419  **/
1420 static void
1421 ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
1422 {
1423         bool print_once = true;
1424         struct ena_ring *tx_ring = &adapter->tx_ring[qid];
1425
1426         ENA_RING_MTX_LOCK(tx_ring);
1427         for (int i = 0; i < tx_ring->ring_size; i++) {
1428                 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1429
1430                 if (tx_info->mbuf == NULL)
1431                         continue;
1432
1433                 if (print_once) {
1434                         ena_log(adapter->pdev, WARN,
1435                             "free uncompleted tx mbuf qid %d idx 0x%x\n", qid,
1436                             i);
1437                         print_once = false;
1438                 } else {
1439                         ena_log(adapter->pdev, DBG,
1440                             "free uncompleted tx mbuf qid %d idx 0x%x\n", qid,
1441                             i);
1442                 }
1443
1444                 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1445                     BUS_DMASYNC_POSTWRITE);
1446                 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1447
1448                 m_free(tx_info->mbuf);
1449                 tx_info->mbuf = NULL;
1450         }
1451         ENA_RING_MTX_UNLOCK(tx_ring);
1452 }
1453
1454 static void
1455 ena_free_all_tx_bufs(struct ena_adapter *adapter)
1456 {
1457         for (int i = 0; i < adapter->num_io_queues; i++)
1458                 ena_free_tx_bufs(adapter, i);
1459 }
1460
1461 static void
1462 ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1463 {
1464         uint16_t ena_qid;
1465         int i;
1466
1467         for (i = 0; i < adapter->num_io_queues; i++) {
1468                 ena_qid = ENA_IO_TXQ_IDX(i);
1469                 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1470         }
1471 }
1472
1473 static void
1474 ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1475 {
1476         uint16_t ena_qid;
1477         int i;
1478
1479         for (i = 0; i < adapter->num_io_queues; i++) {
1480                 ena_qid = ENA_IO_RXQ_IDX(i);
1481                 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1482         }
1483 }
1484
1485 static void
1486 ena_destroy_all_io_queues(struct ena_adapter *adapter)
1487 {
1488         struct ena_que *queue;
1489         int i;
1490
1491         for (i = 0; i < adapter->num_io_queues; i++) {
1492                 queue = &adapter->que[i];
1493                 while (taskqueue_cancel(queue->cleanup_tq, &queue->cleanup_task, NULL))
1494                         taskqueue_drain(queue->cleanup_tq, &queue->cleanup_task);
1495                 taskqueue_free(queue->cleanup_tq);
1496         }
1497
1498         ena_destroy_all_tx_queues(adapter);
1499         ena_destroy_all_rx_queues(adapter);
1500 }
1501
1502 static int
1503 ena_create_io_queues(struct ena_adapter *adapter)
1504 {
1505         struct ena_com_dev *ena_dev = adapter->ena_dev;
1506         struct ena_com_create_io_ctx ctx;
1507         struct ena_ring *ring;
1508         struct ena_que *queue;
1509         uint16_t ena_qid;
1510         uint32_t msix_vector;
1511         cpuset_t *cpu_mask = NULL;
1512         int rc, i;
1513
1514         /* Create TX queues */
1515         for (i = 0; i < adapter->num_io_queues; i++) {
1516                 msix_vector = ENA_IO_IRQ_IDX(i);
1517                 ena_qid = ENA_IO_TXQ_IDX(i);
1518                 ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1519                 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1520                 ctx.queue_size = adapter->requested_tx_ring_size;
1521                 ctx.msix_vector = msix_vector;
1522                 ctx.qid = ena_qid;
1523                 ctx.numa_node = adapter->que[i].domain;
1524
1525                 rc = ena_com_create_io_queue(ena_dev, &ctx);
1526                 if (rc != 0) {
1527                         ena_log(adapter->pdev, ERR,
1528                             "Failed to create io TX queue #%d rc: %d\n", i, rc);
1529                         goto err_tx;
1530                 }
1531                 ring = &adapter->tx_ring[i];
1532                 rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1533                     &ring->ena_com_io_sq, &ring->ena_com_io_cq);
1534                 if (rc != 0) {
1535                         ena_log(adapter->pdev, ERR,
1536                             "Failed to get TX queue handlers. TX queue num"
1537                             " %d rc: %d\n",
1538                             i, rc);
1539                         ena_com_destroy_io_queue(ena_dev, ena_qid);
1540                         goto err_tx;
1541                 }
1542
1543                 if (ctx.numa_node >= 0) {
1544                         ena_com_update_numa_node(ring->ena_com_io_cq,
1545                             ctx.numa_node);
1546                 }
1547         }
1548
1549         /* Create RX queues */
1550         for (i = 0; i < adapter->num_io_queues; i++) {
1551                 msix_vector = ENA_IO_IRQ_IDX(i);
1552                 ena_qid = ENA_IO_RXQ_IDX(i);
1553                 ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1554                 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1555                 ctx.queue_size = adapter->requested_rx_ring_size;
1556                 ctx.msix_vector = msix_vector;
1557                 ctx.qid = ena_qid;
1558                 ctx.numa_node = adapter->que[i].domain;
1559
1560                 rc = ena_com_create_io_queue(ena_dev, &ctx);
1561                 if (unlikely(rc != 0)) {
1562                         ena_log(adapter->pdev, ERR,
1563                             "Failed to create io RX queue[%d] rc: %d\n", i, rc);
1564                         goto err_rx;
1565                 }
1566
1567                 ring = &adapter->rx_ring[i];
1568                 rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1569                     &ring->ena_com_io_sq, &ring->ena_com_io_cq);
1570                 if (unlikely(rc != 0)) {
1571                         ena_log(adapter->pdev, ERR,
1572                             "Failed to get RX queue handlers. RX queue num"
1573                             " %d rc: %d\n",
1574                             i, rc);
1575                         ena_com_destroy_io_queue(ena_dev, ena_qid);
1576                         goto err_rx;
1577                 }
1578
1579                 if (ctx.numa_node >= 0) {
1580                         ena_com_update_numa_node(ring->ena_com_io_cq,
1581                             ctx.numa_node);
1582                 }
1583         }
1584
1585         for (i = 0; i < adapter->num_io_queues; i++) {
1586                 queue = &adapter->que[i];
1587
1588                 NET_TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue);
1589                 queue->cleanup_tq = taskqueue_create_fast("ena cleanup",
1590                     M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq);
1591
1592 #ifdef RSS
1593                 cpu_mask = &queue->cpu_mask;
1594 #endif
1595                 taskqueue_start_threads_cpuset(&queue->cleanup_tq, 1, PI_NET,
1596                     cpu_mask, "%s queue %d cleanup",
1597                     device_get_nameunit(adapter->pdev), i);
1598         }
1599
1600         return (0);
1601
1602 err_rx:
1603         while (i--)
1604                 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1605         i = adapter->num_io_queues;
1606 err_tx:
1607         while (i--)
1608                 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1609
1610         return (ENXIO);
1611 }
1612
1613 /*********************************************************************
1614  *
1615  *  MSIX & Interrupt Service routine
1616  *
1617  **********************************************************************/
1618
1619 /**
1620  * ena_handle_msix - MSIX Interrupt Handler for admin/async queue
1621  * @arg: interrupt number
1622  **/
1623 static void
1624 ena_intr_msix_mgmnt(void *arg)
1625 {
1626         struct ena_adapter *adapter = (struct ena_adapter *)arg;
1627
1628         ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1629         if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)))
1630                 ena_com_aenq_intr_handler(adapter->ena_dev, arg);
1631 }
1632
1633 /**
1634  * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
1635  * @arg: queue
1636  **/
1637 static int
1638 ena_handle_msix(void *arg)
1639 {
1640         struct ena_que *queue = arg;
1641         struct ena_adapter *adapter = queue->adapter;
1642         if_t ifp = adapter->ifp;
1643
1644         if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1645                 return (FILTER_STRAY);
1646
1647         taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task);
1648
1649         return (FILTER_HANDLED);
1650 }
1651
1652 static int
1653 ena_enable_msix(struct ena_adapter *adapter)
1654 {
1655         device_t dev = adapter->pdev;
1656         int msix_vecs, msix_req;
1657         int i, rc = 0;
1658
1659         if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1660                 ena_log(dev, ERR, "Error, MSI-X is already enabled\n");
1661                 return (EINVAL);
1662         }
1663
1664         /* Reserved the max msix vectors we might need */
1665         msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1666
1667         adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry),
1668             M_DEVBUF, M_WAITOK | M_ZERO);
1669
1670         ena_log(dev, DBG, "trying to enable MSI-X, vectors: %d\n", msix_vecs);
1671
1672         for (i = 0; i < msix_vecs; i++) {
1673                 adapter->msix_entries[i].entry = i;
1674                 /* Vectors must start from 1 */
1675                 adapter->msix_entries[i].vector = i + 1;
1676         }
1677
1678         msix_req = msix_vecs;
1679         rc = pci_alloc_msix(dev, &msix_vecs);
1680         if (unlikely(rc != 0)) {
1681                 ena_log(dev, ERR, "Failed to enable MSIX, vectors %d rc %d\n",
1682                     msix_vecs, rc);
1683
1684                 rc = ENOSPC;
1685                 goto err_msix_free;
1686         }
1687
1688         if (msix_vecs != msix_req) {
1689                 if (msix_vecs == ENA_ADMIN_MSIX_VEC) {
1690                         ena_log(dev, ERR,
1691                             "Not enough number of MSI-x allocated: %d\n",
1692                             msix_vecs);
1693                         pci_release_msi(dev);
1694                         rc = ENOSPC;
1695                         goto err_msix_free;
1696                 }
1697                 ena_log(dev, ERR,
1698                     "Enable only %d MSI-x (out of %d), reduce "
1699                     "the number of queues\n",
1700                     msix_vecs, msix_req);
1701         }
1702
1703         adapter->msix_vecs = msix_vecs;
1704         ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1705
1706         return (0);
1707
1708 err_msix_free:
1709         free(adapter->msix_entries, M_DEVBUF);
1710         adapter->msix_entries = NULL;
1711
1712         return (rc);
1713 }
1714
1715 static void
1716 ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1717 {
1718         snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, ENA_IRQNAME_SIZE,
1719             "ena-mgmnt@pci:%s", device_get_nameunit(adapter->pdev));
1720         /*
1721          * Handler is NULL on purpose, it will be set
1722          * when mgmnt interrupt is acquired
1723          */
1724         adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL;
1725         adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1726         adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1727             adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
1728 }
1729
1730 static int
1731 ena_setup_io_intr(struct ena_adapter *adapter)
1732 {
1733 #ifdef RSS
1734         int num_buckets = rss_getnumbuckets();
1735         static int last_bind = 0;
1736         int cur_bind;
1737         int idx;
1738 #endif
1739         int irq_idx;
1740
1741         if (adapter->msix_entries == NULL)
1742                 return (EINVAL);
1743
1744 #ifdef RSS
1745         if (adapter->first_bind < 0) {
1746                 adapter->first_bind = last_bind;
1747                 last_bind = (last_bind + adapter->num_io_queues) % num_buckets;
1748         }
1749         cur_bind = adapter->first_bind;
1750 #endif
1751
1752         for (int i = 0; i < adapter->num_io_queues; i++) {
1753                 irq_idx = ENA_IO_IRQ_IDX(i);
1754
1755                 snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1756                     "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i);
1757                 adapter->irq_tbl[irq_idx].handler = ena_handle_msix;
1758                 adapter->irq_tbl[irq_idx].data = &adapter->que[i];
1759                 adapter->irq_tbl[irq_idx].vector =
1760                     adapter->msix_entries[irq_idx].vector;
1761                 ena_log(adapter->pdev, DBG, "ena_setup_io_intr vector: %d\n",
1762                     adapter->msix_entries[irq_idx].vector);
1763
1764                 if (adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) {
1765                         adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1766                             (unsigned)(adapter->irq_cpu_base +
1767                             i * adapter->irq_cpu_stride) % (unsigned)mp_ncpus;
1768                         CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask);
1769                 }
1770
1771 #ifdef RSS
1772                 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1773                     rss_getcpu(cur_bind);
1774                 cur_bind = (cur_bind + 1) % num_buckets;
1775                 CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask);
1776
1777                 for (idx = 0; idx < MAXMEMDOM; ++idx) {
1778                         if (CPU_ISSET(adapter->que[i].cpu, &cpuset_domain[idx]))
1779                                 break;
1780                 }
1781                 adapter->que[i].domain = idx;
1782 #else
1783                 adapter->que[i].domain = -1;
1784 #endif
1785         }
1786
1787         return (0);
1788 }
1789
1790 static int
1791 ena_request_mgmnt_irq(struct ena_adapter *adapter)
1792 {
1793         device_t pdev = adapter->pdev;
1794         struct ena_irq *irq;
1795         unsigned long flags;
1796         int rc, rcc;
1797
1798         flags = RF_ACTIVE | RF_SHAREABLE;
1799
1800         irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1801         irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1802             &irq->vector, flags);
1803
1804         if (unlikely(irq->res == NULL)) {
1805                 ena_log(pdev, ERR, "could not allocate irq vector: %d\n",
1806                     irq->vector);
1807                 return (ENXIO);
1808         }
1809
1810         rc = bus_setup_intr(adapter->pdev, irq->res,
1811             INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt, irq->data,
1812             &irq->cookie);
1813         if (unlikely(rc != 0)) {
1814                 ena_log(pdev, ERR,
1815                     "failed to register interrupt handler for irq %ju: %d\n",
1816                     rman_get_start(irq->res), rc);
1817                 goto err_res_free;
1818         }
1819         irq->requested = true;
1820
1821         return (rc);
1822
1823 err_res_free:
1824         ena_log(pdev, INFO, "releasing resource for irq %d\n", irq->vector);
1825         rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, irq->vector,
1826             irq->res);
1827         if (unlikely(rcc != 0))
1828                 ena_log(pdev, ERR,
1829                     "dev has no parent while releasing res for irq: %d\n",
1830                     irq->vector);
1831         irq->res = NULL;
1832
1833         return (rc);
1834 }
1835
1836 static int
1837 ena_request_io_irq(struct ena_adapter *adapter)
1838 {
1839         device_t pdev = adapter->pdev;
1840         struct ena_irq *irq;
1841         unsigned long flags = 0;
1842         int rc = 0, i, rcc;
1843
1844         if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) {
1845                 ena_log(pdev, ERR,
1846                     "failed to request I/O IRQ: MSI-X is not enabled\n");
1847                 return (EINVAL);
1848         } else {
1849                 flags = RF_ACTIVE | RF_SHAREABLE;
1850         }
1851
1852         for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1853                 irq = &adapter->irq_tbl[i];
1854
1855                 if (unlikely(irq->requested))
1856                         continue;
1857
1858                 irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1859                     &irq->vector, flags);
1860                 if (unlikely(irq->res == NULL)) {
1861                         rc = ENOMEM;
1862                         ena_log(pdev, ERR,
1863                             "could not allocate irq vector: %d\n", irq->vector);
1864                         goto err;
1865                 }
1866
1867                 rc = bus_setup_intr(adapter->pdev, irq->res,
1868                     INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL, irq->data,
1869                     &irq->cookie);
1870                 if (unlikely(rc != 0)) {
1871                         ena_log(pdev, ERR,
1872                             "failed to register interrupt handler for irq %ju: %d\n",
1873                             rman_get_start(irq->res), rc);
1874                         goto err;
1875                 }
1876                 irq->requested = true;
1877
1878                 if (adapter->rss_enabled || adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) {
1879                         rc = bus_bind_intr(adapter->pdev, irq->res, irq->cpu);
1880                         if (unlikely(rc != 0)) {
1881                                 ena_log(pdev, ERR,
1882                                     "failed to bind interrupt handler for irq %ju to cpu %d: %d\n",
1883                                     rman_get_start(irq->res), irq->cpu, rc);
1884                                 goto err;
1885                         }
1886
1887                         ena_log(pdev, INFO, "queue %d - cpu %d\n",
1888                             i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
1889                 }
1890         }
1891         return (rc);
1892
1893 err:
1894
1895         for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) {
1896                 irq = &adapter->irq_tbl[i];
1897                 rcc = 0;
1898
1899                 /* Once we entered err: section and irq->requested is true we
1900                    free both intr and resources */
1901                 if (irq->requested) {
1902                         rcc = bus_teardown_intr(adapter->pdev, irq->res,
1903                             irq->cookie);
1904                         if (unlikely(rcc != 0))
1905                                 ena_log(pdev, ERR,
1906                                     "could not release irq: %d, error: %d\n",
1907                                     irq->vector, rcc);
1908                 }
1909
1910                 /* If we entered err: section without irq->requested set we know
1911                    it was bus_alloc_resource_any() that needs cleanup, provided
1912                    res is not NULL. In case res is NULL no work in needed in
1913                    this iteration */
1914                 rcc = 0;
1915                 if (irq->res != NULL) {
1916                         rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1917                             irq->vector, irq->res);
1918                 }
1919                 if (unlikely(rcc != 0))
1920                         ena_log(pdev, ERR,
1921                             "dev has no parent while releasing res for irq: %d\n",
1922                             irq->vector);
1923                 irq->requested = false;
1924                 irq->res = NULL;
1925         }
1926
1927         return (rc);
1928 }
1929
1930 static void
1931 ena_free_mgmnt_irq(struct ena_adapter *adapter)
1932 {
1933         device_t pdev = adapter->pdev;
1934         struct ena_irq *irq;
1935         int rc;
1936
1937         irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1938         if (irq->requested) {
1939                 ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector);
1940                 rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1941                 if (unlikely(rc != 0))
1942                         ena_log(pdev, ERR, "failed to tear down irq: %d\n",
1943                             irq->vector);
1944                 irq->requested = 0;
1945         }
1946
1947         if (irq->res != NULL) {
1948                 ena_log(pdev, DBG, "release resource irq: %d\n", irq->vector);
1949                 rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1950                     irq->vector, irq->res);
1951                 irq->res = NULL;
1952                 if (unlikely(rc != 0))
1953                         ena_log(pdev, ERR,
1954                             "dev has no parent while releasing res for irq: %d\n",
1955                             irq->vector);
1956         }
1957 }
1958
1959 static void
1960 ena_free_io_irq(struct ena_adapter *adapter)
1961 {
1962         device_t pdev = adapter->pdev;
1963         struct ena_irq *irq;
1964         int rc;
1965
1966         for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1967                 irq = &adapter->irq_tbl[i];
1968                 if (irq->requested) {
1969                         ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector);
1970                         rc = bus_teardown_intr(adapter->pdev, irq->res,
1971                             irq->cookie);
1972                         if (unlikely(rc != 0)) {
1973                                 ena_log(pdev, ERR,
1974                                     "failed to tear down irq: %d\n",
1975                                     irq->vector);
1976                         }
1977                         irq->requested = 0;
1978                 }
1979
1980                 if (irq->res != NULL) {
1981                         ena_log(pdev, DBG, "release resource irq: %d\n",
1982                             irq->vector);
1983                         rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1984                             irq->vector, irq->res);
1985                         irq->res = NULL;
1986                         if (unlikely(rc != 0)) {
1987                                 ena_log(pdev, ERR,
1988                                     "dev has no parent while releasing res for irq: %d\n",
1989                                     irq->vector);
1990                         }
1991                 }
1992         }
1993 }
1994
1995 static void
1996 ena_free_irqs(struct ena_adapter *adapter)
1997 {
1998         ena_free_io_irq(adapter);
1999         ena_free_mgmnt_irq(adapter);
2000         ena_disable_msix(adapter);
2001 }
2002
2003 static void
2004 ena_disable_msix(struct ena_adapter *adapter)
2005 {
2006         if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
2007                 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
2008                 pci_release_msi(adapter->pdev);
2009         }
2010
2011         adapter->msix_vecs = 0;
2012         free(adapter->msix_entries, M_DEVBUF);
2013         adapter->msix_entries = NULL;
2014 }
2015
2016 static void
2017 ena_unmask_all_io_irqs(struct ena_adapter *adapter)
2018 {
2019         struct ena_com_io_cq *io_cq;
2020         struct ena_eth_io_intr_reg intr_reg;
2021         struct ena_ring *tx_ring;
2022         uint16_t ena_qid;
2023         int i;
2024
2025         /* Unmask interrupts for all queues */
2026         for (i = 0; i < adapter->num_io_queues; i++) {
2027                 ena_qid = ENA_IO_TXQ_IDX(i);
2028                 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
2029                 ena_com_update_intr_reg(&intr_reg, 0, 0, true);
2030                 tx_ring = &adapter->tx_ring[i];
2031                 counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1);
2032                 ena_com_unmask_intr(io_cq, &intr_reg);
2033         }
2034 }
2035
2036 static int
2037 ena_up_complete(struct ena_adapter *adapter)
2038 {
2039         int rc;
2040
2041         if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
2042                 rc = ena_rss_configure(adapter);
2043                 if (rc != 0) {
2044                         ena_log(adapter->pdev, ERR,
2045                             "Failed to configure RSS\n");
2046                         return (rc);
2047                 }
2048         }
2049
2050         rc = ena_change_mtu(adapter->ifp, if_getmtu(adapter->ifp));
2051         if (unlikely(rc != 0))
2052                 return (rc);
2053
2054         ena_refill_all_rx_bufs(adapter);
2055         ena_reset_counters((counter_u64_t *)&adapter->hw_stats,
2056             sizeof(adapter->hw_stats));
2057
2058         return (0);
2059 }
2060
2061 static void
2062 set_io_rings_size(struct ena_adapter *adapter, int new_tx_size, int new_rx_size)
2063 {
2064         int i;
2065
2066         for (i = 0; i < adapter->num_io_queues; i++) {
2067                 adapter->tx_ring[i].ring_size = new_tx_size;
2068                 adapter->rx_ring[i].ring_size = new_rx_size;
2069         }
2070 }
2071
2072 static int
2073 create_queues_with_size_backoff(struct ena_adapter *adapter)
2074 {
2075         device_t pdev = adapter->pdev;
2076         int rc;
2077         uint32_t cur_rx_ring_size, cur_tx_ring_size;
2078         uint32_t new_rx_ring_size, new_tx_ring_size;
2079
2080         /*
2081          * Current queue sizes might be set to smaller than the requested
2082          * ones due to past queue allocation failures.
2083          */
2084         set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2085             adapter->requested_rx_ring_size);
2086
2087         while (1) {
2088                 /* Allocate transmit descriptors */
2089                 rc = ena_setup_all_tx_resources(adapter);
2090                 if (unlikely(rc != 0)) {
2091                         ena_log(pdev, ERR, "err_setup_tx\n");
2092                         goto err_setup_tx;
2093                 }
2094
2095                 /* Allocate receive descriptors */
2096                 rc = ena_setup_all_rx_resources(adapter);
2097                 if (unlikely(rc != 0)) {
2098                         ena_log(pdev, ERR, "err_setup_rx\n");
2099                         goto err_setup_rx;
2100                 }
2101
2102                 /* Create IO queues for Rx & Tx */
2103                 rc = ena_create_io_queues(adapter);
2104                 if (unlikely(rc != 0)) {
2105                         ena_log(pdev, ERR, "create IO queues failed\n");
2106                         goto err_io_que;
2107                 }
2108
2109                 return (0);
2110
2111 err_io_que:
2112                 ena_free_all_rx_resources(adapter);
2113 err_setup_rx:
2114                 ena_free_all_tx_resources(adapter);
2115 err_setup_tx:
2116                 /*
2117                  * Lower the ring size if ENOMEM. Otherwise, return the
2118                  * error straightaway.
2119                  */
2120                 if (unlikely(rc != ENOMEM)) {
2121                         ena_log(pdev, ERR,
2122                             "Queue creation failed with error code: %d\n", rc);
2123                         return (rc);
2124                 }
2125
2126                 cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2127                 cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2128
2129                 ena_log(pdev, ERR,
2130                     "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2131                     cur_tx_ring_size, cur_rx_ring_size);
2132
2133                 new_tx_ring_size = cur_tx_ring_size;
2134                 new_rx_ring_size = cur_rx_ring_size;
2135
2136                 /*
2137                  * Decrease the size of a larger queue, or decrease both if they
2138                  * are the same size.
2139                  */
2140                 if (cur_rx_ring_size <= cur_tx_ring_size)
2141                         new_tx_ring_size = cur_tx_ring_size / 2;
2142                 if (cur_rx_ring_size >= cur_tx_ring_size)
2143                         new_rx_ring_size = cur_rx_ring_size / 2;
2144
2145                 if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2146                     new_rx_ring_size < ENA_MIN_RING_SIZE) {
2147                         ena_log(pdev, ERR,
2148                             "Queue creation failed with the smallest possible queue size"
2149                             "of %d for both queues. Not retrying with smaller queues\n",
2150                             ENA_MIN_RING_SIZE);
2151                         return (rc);
2152                 }
2153
2154                 ena_log(pdev, INFO,
2155                     "Retrying queue creation with sizes TX=%d, RX=%d\n",
2156                     new_tx_ring_size, new_rx_ring_size);
2157
2158                 set_io_rings_size(adapter, new_tx_ring_size, new_rx_ring_size);
2159         }
2160 }
2161
2162 int
2163 ena_up(struct ena_adapter *adapter)
2164 {
2165         int rc = 0;
2166
2167         ENA_LOCK_ASSERT();
2168
2169         if (unlikely(device_is_attached(adapter->pdev) == 0)) {
2170                 ena_log(adapter->pdev, ERR, "device is not attached!\n");
2171                 return (ENXIO);
2172         }
2173
2174         if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2175                 return (0);
2176
2177         ena_log(adapter->pdev, INFO, "device is going UP\n");
2178
2179         /* setup interrupts for IO queues */
2180         rc = ena_setup_io_intr(adapter);
2181         if (unlikely(rc != 0)) {
2182                 ena_log(adapter->pdev, ERR, "error setting up IO interrupt\n");
2183                 goto error;
2184         }
2185         rc = ena_request_io_irq(adapter);
2186         if (unlikely(rc != 0)) {
2187                 ena_log(adapter->pdev, ERR, "err_req_irq\n");
2188                 goto error;
2189         }
2190
2191         ena_log(adapter->pdev, INFO,
2192             "Creating %u IO queues. Rx queue size: %d, Tx queue size: %d, LLQ is %s\n",
2193             adapter->num_io_queues,
2194             adapter->requested_rx_ring_size,
2195             adapter->requested_tx_ring_size,
2196             (adapter->ena_dev->tx_mem_queue_type ==
2197                 ENA_ADMIN_PLACEMENT_POLICY_DEV) ? "ENABLED" : "DISABLED");
2198
2199         rc = create_queues_with_size_backoff(adapter);
2200         if (unlikely(rc != 0)) {
2201                 ena_log(adapter->pdev, ERR,
2202                     "error creating queues with size backoff\n");
2203                 goto err_create_queues_with_backoff;
2204         }
2205
2206         if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
2207                 if_link_state_change(adapter->ifp, LINK_STATE_UP);
2208
2209         rc = ena_up_complete(adapter);
2210         if (unlikely(rc != 0))
2211                 goto err_up_complete;
2212
2213         counter_u64_add(adapter->dev_stats.interface_up, 1);
2214
2215         ena_update_hwassist(adapter);
2216
2217         if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
2218
2219         ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2220
2221         ena_unmask_all_io_irqs(adapter);
2222
2223         return (0);
2224
2225 err_up_complete:
2226         ena_destroy_all_io_queues(adapter);
2227         ena_free_all_rx_resources(adapter);
2228         ena_free_all_tx_resources(adapter);
2229 err_create_queues_with_backoff:
2230         ena_free_io_irq(adapter);
2231 error:
2232         return (rc);
2233 }
2234
2235 static uint64_t
2236 ena_get_counter(if_t ifp, ift_counter cnt)
2237 {
2238         struct ena_adapter *adapter;
2239         struct ena_hw_stats *stats;
2240
2241         adapter = if_getsoftc(ifp);
2242         stats = &adapter->hw_stats;
2243
2244         switch (cnt) {
2245         case IFCOUNTER_IPACKETS:
2246                 return (counter_u64_fetch(stats->rx_packets));
2247         case IFCOUNTER_OPACKETS:
2248                 return (counter_u64_fetch(stats->tx_packets));
2249         case IFCOUNTER_IBYTES:
2250                 return (counter_u64_fetch(stats->rx_bytes));
2251         case IFCOUNTER_OBYTES:
2252                 return (counter_u64_fetch(stats->tx_bytes));
2253         case IFCOUNTER_IQDROPS:
2254                 return (counter_u64_fetch(stats->rx_drops));
2255         case IFCOUNTER_OQDROPS:
2256                 return (counter_u64_fetch(stats->tx_drops));
2257         default:
2258                 return (if_get_counter_default(ifp, cnt));
2259         }
2260 }
2261
2262 static int
2263 ena_media_change(if_t ifp)
2264 {
2265         /* Media Change is not supported by firmware */
2266         return (0);
2267 }
2268
2269 static void
2270 ena_media_status(if_t ifp, struct ifmediareq *ifmr)
2271 {
2272         struct ena_adapter *adapter = if_getsoftc(ifp);
2273         ena_log(adapter->pdev, DBG, "Media status update\n");
2274
2275         ENA_LOCK_LOCK();
2276
2277         ifmr->ifm_status = IFM_AVALID;
2278         ifmr->ifm_active = IFM_ETHER;
2279
2280         if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) {
2281                 ENA_LOCK_UNLOCK();
2282                 ena_log(adapter->pdev, INFO, "Link is down\n");
2283                 return;
2284         }
2285
2286         ifmr->ifm_status |= IFM_ACTIVE;
2287         ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX;
2288
2289         ENA_LOCK_UNLOCK();
2290 }
2291
2292 static void
2293 ena_init(void *arg)
2294 {
2295         struct ena_adapter *adapter = (struct ena_adapter *)arg;
2296
2297         if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2298                 ENA_LOCK_LOCK();
2299                 ena_up(adapter);
2300                 ENA_LOCK_UNLOCK();
2301         }
2302 }
2303
2304 static int
2305 ena_ioctl(if_t ifp, u_long command, caddr_t data)
2306 {
2307         struct ena_adapter *adapter;
2308         struct ifreq *ifr;
2309         int rc;
2310
2311         adapter = if_getsoftc(ifp);
2312         ifr = (struct ifreq *)data;
2313
2314         /*
2315          * Acquiring lock to prevent from running up and down routines parallel.
2316          */
2317         rc = 0;
2318         switch (command) {
2319         case SIOCSIFMTU:
2320                 if (if_getmtu(ifp) == ifr->ifr_mtu)
2321                         break;
2322                 ENA_LOCK_LOCK();
2323                 ena_down(adapter);
2324
2325                 ena_change_mtu(ifp, ifr->ifr_mtu);
2326
2327                 rc = ena_up(adapter);
2328                 ENA_LOCK_UNLOCK();
2329                 break;
2330
2331         case SIOCSIFFLAGS:
2332                 if ((if_getflags(ifp) & IFF_UP) != 0) {
2333                         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2334                                 if ((if_getflags(ifp) & (IFF_PROMISC |
2335                                     IFF_ALLMULTI)) != 0) {
2336                                         ena_log(adapter->pdev, INFO,
2337                                             "ioctl promisc/allmulti\n");
2338                                 }
2339                         } else {
2340                                 ENA_LOCK_LOCK();
2341                                 rc = ena_up(adapter);
2342                                 ENA_LOCK_UNLOCK();
2343                         }
2344                 } else {
2345                         if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2346                                 ENA_LOCK_LOCK();
2347                                 ena_down(adapter);
2348                                 ENA_LOCK_UNLOCK();
2349                         }
2350                 }
2351                 break;
2352
2353         case SIOCADDMULTI:
2354         case SIOCDELMULTI:
2355                 break;
2356
2357         case SIOCSIFMEDIA:
2358         case SIOCGIFMEDIA:
2359                 rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
2360                 break;
2361
2362         case SIOCSIFCAP:
2363                 {
2364                         int reinit = 0;
2365
2366                         if (ifr->ifr_reqcap != if_getcapenable(ifp)) {
2367                                 if_setcapenable(ifp, ifr->ifr_reqcap);
2368                                 reinit = 1;
2369                         }
2370
2371                         if ((reinit != 0) &&
2372                             ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) {
2373                                 ENA_LOCK_LOCK();
2374                                 ena_down(adapter);
2375                                 rc = ena_up(adapter);
2376                                 ENA_LOCK_UNLOCK();
2377                         }
2378                 }
2379
2380                 break;
2381         default:
2382                 rc = ether_ioctl(ifp, command, data);
2383                 break;
2384         }
2385
2386         return (rc);
2387 }
2388
2389 static int
2390 ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
2391 {
2392         int caps = 0;
2393
2394         if ((feat->offload.tx &
2395             (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2396             ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
2397             ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
2398                 caps |= IFCAP_TXCSUM;
2399
2400         if ((feat->offload.tx &
2401             (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK |
2402             ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0)
2403                 caps |= IFCAP_TXCSUM_IPV6;
2404
2405         if ((feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0)
2406                 caps |= IFCAP_TSO4;
2407
2408         if ((feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0)
2409                 caps |= IFCAP_TSO6;
2410
2411         if ((feat->offload.rx_supported &
2412             (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK |
2413             ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0)
2414                 caps |= IFCAP_RXCSUM;
2415
2416         if ((feat->offload.rx_supported &
2417             ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0)
2418                 caps |= IFCAP_RXCSUM_IPV6;
2419
2420         caps |= IFCAP_LRO | IFCAP_JUMBO_MTU;
2421
2422         return (caps);
2423 }
2424
2425 static void
2426 ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp)
2427 {
2428         host_info->supported_network_features[0] = (uint32_t)if_getcapabilities(ifp);
2429 }
2430
2431 static void
2432 ena_update_hwassist(struct ena_adapter *adapter)
2433 {
2434         if_t ifp = adapter->ifp;
2435         uint32_t feat = adapter->tx_offload_cap;
2436         int cap = if_getcapenable(ifp);
2437         int flags = 0;
2438
2439         if_clearhwassist(ifp);
2440
2441         if ((cap & IFCAP_TXCSUM) != 0) {
2442                 if ((feat &
2443                     ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0)
2444                         flags |= CSUM_IP;
2445                 if ((feat &
2446                     (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2447                     ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0)
2448                         flags |= CSUM_IP_UDP | CSUM_IP_TCP;
2449         }
2450
2451         if ((cap & IFCAP_TXCSUM_IPV6) != 0)
2452                 flags |= CSUM_IP6_UDP | CSUM_IP6_TCP;
2453
2454         if ((cap & IFCAP_TSO4) != 0)
2455                 flags |= CSUM_IP_TSO;
2456
2457         if ((cap & IFCAP_TSO6) != 0)
2458                 flags |= CSUM_IP6_TSO;
2459
2460         if_sethwassistbits(ifp, flags, 0);
2461 }
2462
2463 static int
2464 ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
2465     struct ena_com_dev_get_features_ctx *feat)
2466 {
2467         if_t ifp;
2468         int caps = 0;
2469
2470         ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2471         if (unlikely(ifp == NULL)) {
2472                 ena_log(pdev, ERR, "can not allocate ifnet structure\n");
2473                 return (ENXIO);
2474         }
2475         if_initname(ifp, device_get_name(pdev), device_get_unit(pdev));
2476         if_setdev(ifp, pdev);
2477         if_setsoftc(ifp, adapter);
2478
2479         if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2480         if_setinitfn(ifp, ena_init);
2481         if_settransmitfn(ifp, ena_mq_start);
2482         if_setqflushfn(ifp, ena_qflush);
2483         if_setioctlfn(ifp, ena_ioctl);
2484         if_setgetcounterfn(ifp, ena_get_counter);
2485
2486         if_setsendqlen(ifp, adapter->requested_tx_ring_size);
2487         if_setsendqready(ifp);
2488         if_setmtu(ifp, ETHERMTU);
2489         if_setbaudrate(ifp, 0);
2490         /* Zeroize capabilities... */
2491         if_setcapabilities(ifp, 0);
2492         if_setcapenable(ifp, 0);
2493         /* check hardware support */
2494         caps = ena_get_dev_offloads(feat);
2495         /* ... and set them */
2496         if_setcapabilitiesbit(ifp, caps, 0);
2497
2498         /* TSO parameters */
2499         if_sethwtsomax(ifp, ENA_TSO_MAXSIZE -
2500             (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
2501         if_sethwtsomaxsegcount(ifp, adapter->max_tx_sgl_size - 1);
2502         if_sethwtsomaxsegsize(ifp, ENA_TSO_MAXSIZE);
2503
2504         if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2505         if_setcapenable(ifp, if_getcapabilities(ifp));
2506
2507         /*
2508          * Specify the media types supported by this adapter and register
2509          * callbacks to update media and link information
2510          */
2511         ifmedia_init(&adapter->media, IFM_IMASK, ena_media_change,
2512             ena_media_status);
2513         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2514         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2515
2516         ether_ifattach(ifp, adapter->mac_addr);
2517
2518         return (0);
2519 }
2520
2521 void
2522 ena_down(struct ena_adapter *adapter)
2523 {
2524         int rc;
2525
2526         ENA_LOCK_ASSERT();
2527
2528         if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2529                 return;
2530
2531         ena_log(adapter->pdev, INFO, "device is going DOWN\n");
2532
2533         ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2534         if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2535
2536         ena_free_io_irq(adapter);
2537
2538         if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) {
2539                 rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
2540                 if (unlikely(rc != 0))
2541                         ena_log(adapter->pdev, ERR, "Device reset failed\n");
2542         }
2543
2544         ena_destroy_all_io_queues(adapter);
2545
2546         ena_free_all_tx_bufs(adapter);
2547         ena_free_all_rx_bufs(adapter);
2548         ena_free_all_tx_resources(adapter);
2549         ena_free_all_rx_resources(adapter);
2550
2551         counter_u64_add(adapter->dev_stats.interface_down, 1);
2552 }
2553
2554 static uint32_t
2555 ena_calc_max_io_queue_num(device_t pdev, struct ena_com_dev *ena_dev,
2556     struct ena_com_dev_get_features_ctx *get_feat_ctx)
2557 {
2558         uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
2559
2560         /* Regular queues capabilities */
2561         if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2562                 struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2563                     &get_feat_ctx->max_queue_ext.max_queue_ext;
2564                 io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
2565                     max_queue_ext->max_rx_cq_num);
2566
2567                 io_tx_sq_num = max_queue_ext->max_tx_sq_num;
2568                 io_tx_cq_num = max_queue_ext->max_tx_cq_num;
2569         } else {
2570                 struct ena_admin_queue_feature_desc *max_queues =
2571                     &get_feat_ctx->max_queues;
2572                 io_tx_sq_num = max_queues->max_sq_num;
2573                 io_tx_cq_num = max_queues->max_cq_num;
2574                 io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
2575         }
2576
2577         /* In case of LLQ use the llq fields for the tx SQ/CQ */
2578         if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2579                 io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
2580
2581         max_num_io_queues = min_t(uint32_t, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
2582         max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_rx_num);
2583         max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_sq_num);
2584         max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_cq_num);
2585         /* 1 IRQ for mgmnt and 1 IRQ for each TX/RX pair */
2586         max_num_io_queues = min_t(uint32_t, max_num_io_queues,
2587             pci_msix_count(pdev) - 1);
2588 #ifdef RSS
2589         max_num_io_queues = min_t(uint32_t, max_num_io_queues,
2590             rss_getnumbuckets());
2591 #endif
2592
2593         return (max_num_io_queues);
2594 }
2595
2596 static int
2597 ena_enable_wc(device_t pdev, struct resource *res)
2598 {
2599 #if defined(__i386) || defined(__amd64) || defined(__aarch64__)
2600         vm_offset_t va;
2601         vm_size_t len;
2602         int rc;
2603
2604         va = (vm_offset_t)rman_get_virtual(res);
2605         len = rman_get_size(res);
2606         /* Enable write combining */
2607         rc = pmap_change_attr(va, len, VM_MEMATTR_WRITE_COMBINING);
2608         if (unlikely(rc != 0)) {
2609                 ena_log(pdev, ERR, "pmap_change_attr failed, %d\n", rc);
2610                 return (rc);
2611         }
2612
2613         return (0);
2614 #endif
2615         return (EOPNOTSUPP);
2616 }
2617
2618 static int
2619 ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev,
2620     struct ena_admin_feature_llq_desc *llq,
2621     struct ena_llq_configurations *llq_default_configurations)
2622 {
2623         int rc;
2624         uint32_t llq_feature_mask;
2625
2626         llq_feature_mask = 1 << ENA_ADMIN_LLQ;
2627         if (!(ena_dev->supported_features & llq_feature_mask)) {
2628                 ena_log(pdev, WARN,
2629                     "LLQ is not supported. Fallback to host mode policy.\n");
2630                 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2631                 return (0);
2632         }
2633
2634         if (ena_dev->mem_bar == NULL) {
2635                 ena_log(pdev, WARN,
2636                     "LLQ is advertised as supported but device doesn't expose mem bar.\n");
2637                 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2638                 return (0);
2639         }
2640
2641         rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
2642         if (unlikely(rc != 0)) {
2643                 ena_log(pdev, WARN,
2644                     "Failed to configure the device mode. "
2645                     "Fallback to host mode policy.\n");
2646                 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2647         }
2648
2649         return (0);
2650 }
2651
2652 static int
2653 ena_map_llq_mem_bar(device_t pdev, struct ena_com_dev *ena_dev)
2654 {
2655         struct ena_adapter *adapter = device_get_softc(pdev);
2656         int rc, rid;
2657
2658         /* Try to allocate resources for LLQ bar */
2659         rid = PCIR_BAR(ENA_MEM_BAR);
2660         adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid,
2661             RF_ACTIVE);
2662         if (unlikely(adapter->memory == NULL)) {
2663                 ena_log(pdev, WARN,
2664                     "Unable to allocate LLQ bar resource. LLQ mode won't be used.\n");
2665                 return (0);
2666         }
2667
2668         /* Enable write combining for better LLQ performance */
2669         rc = ena_enable_wc(adapter->pdev, adapter->memory);
2670         if (unlikely(rc != 0)) {
2671                 ena_log(pdev, ERR, "failed to enable write combining.\n");
2672                 return (rc);
2673         }
2674
2675         /*
2676          * Save virtual address of the device's memory region
2677          * for the ena_com layer.
2678          */
2679         ena_dev->mem_bar = rman_get_virtual(adapter->memory);
2680
2681         return (0);
2682 }
2683
2684 static inline void
2685 set_default_llq_configurations(struct ena_llq_configurations *llq_config,
2686     struct ena_admin_feature_llq_desc *llq)
2687 {
2688         llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
2689         llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
2690         llq_config->llq_num_decs_before_header =
2691             ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
2692         if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) !=
2693             0 && ena_force_large_llq_header) {
2694                 llq_config->llq_ring_entry_size =
2695                     ENA_ADMIN_LIST_ENTRY_SIZE_256B;
2696                 llq_config->llq_ring_entry_size_value = 256;
2697         } else {
2698                 llq_config->llq_ring_entry_size =
2699                     ENA_ADMIN_LIST_ENTRY_SIZE_128B;
2700                 llq_config->llq_ring_entry_size_value = 128;
2701         }
2702 }
2703
2704 static int
2705 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
2706 {
2707         struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
2708         struct ena_com_dev *ena_dev = ctx->ena_dev;
2709         uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE;
2710         uint32_t rx_queue_size = ENA_DEFAULT_RING_SIZE;
2711         uint32_t max_tx_queue_size;
2712         uint32_t max_rx_queue_size;
2713
2714         if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2715                 struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2716                     &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
2717                 max_rx_queue_size = min_t(uint32_t,
2718                     max_queue_ext->max_rx_cq_depth,
2719                     max_queue_ext->max_rx_sq_depth);
2720                 max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
2721
2722                 if (ena_dev->tx_mem_queue_type ==
2723                     ENA_ADMIN_PLACEMENT_POLICY_DEV)
2724                         max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2725                             llq->max_llq_depth);
2726                 else
2727                         max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2728                             max_queue_ext->max_tx_sq_depth);
2729
2730                 ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2731                     max_queue_ext->max_per_packet_tx_descs);
2732                 ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2733                     max_queue_ext->max_per_packet_rx_descs);
2734         } else {
2735                 struct ena_admin_queue_feature_desc *max_queues =
2736                     &ctx->get_feat_ctx->max_queues;
2737                 max_rx_queue_size = min_t(uint32_t, max_queues->max_cq_depth,
2738                     max_queues->max_sq_depth);
2739                 max_tx_queue_size = max_queues->max_cq_depth;
2740
2741                 if (ena_dev->tx_mem_queue_type ==
2742                     ENA_ADMIN_PLACEMENT_POLICY_DEV)
2743                         max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2744                             llq->max_llq_depth);
2745                 else
2746                         max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2747                             max_queues->max_sq_depth);
2748
2749                 ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2750                     max_queues->max_packet_tx_descs);
2751                 ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2752                     max_queues->max_packet_rx_descs);
2753         }
2754
2755         /* round down to the nearest power of 2 */
2756         max_tx_queue_size = 1 << (flsl(max_tx_queue_size) - 1);
2757         max_rx_queue_size = 1 << (flsl(max_rx_queue_size) - 1);
2758
2759         /*
2760          * When forcing large headers, we multiply the entry size by 2,
2761          * and therefore divide the queue size by 2, leaving the amount
2762          * of memory used by the queues unchanged.
2763          */
2764         if (ena_force_large_llq_header) {
2765                 if ((llq->entry_size_ctrl_supported &
2766                     ENA_ADMIN_LIST_ENTRY_SIZE_256B) != 0 &&
2767                     ena_dev->tx_mem_queue_type ==
2768                     ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2769                         max_tx_queue_size /= 2;
2770                         ena_log(ctx->pdev, INFO,
2771                             "Forcing large headers and decreasing maximum Tx queue size to %d\n",
2772                             max_tx_queue_size);
2773                 } else {
2774                         ena_log(ctx->pdev, WARN,
2775                             "Forcing large headers failed: LLQ is disabled or device does not support large headers\n");
2776                 }
2777         }
2778
2779         tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
2780             max_tx_queue_size);
2781         rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
2782             max_rx_queue_size);
2783
2784         tx_queue_size = 1 << (flsl(tx_queue_size) - 1);
2785         rx_queue_size = 1 << (flsl(rx_queue_size) - 1);
2786
2787         ctx->max_tx_queue_size = max_tx_queue_size;
2788         ctx->max_rx_queue_size = max_rx_queue_size;
2789         ctx->tx_queue_size = tx_queue_size;
2790         ctx->rx_queue_size = rx_queue_size;
2791
2792         return (0);
2793 }
2794
2795 static void
2796 ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev)
2797 {
2798         struct ena_admin_host_info *host_info;
2799         uintptr_t rid;
2800         int rc;
2801
2802         /* Allocate only the host info */
2803         rc = ena_com_allocate_host_info(ena_dev);
2804         if (unlikely(rc != 0)) {
2805                 ena_log(dev, ERR, "Cannot allocate host info\n");
2806                 return;
2807         }
2808
2809         host_info = ena_dev->host_attr.host_info;
2810
2811         if (pci_get_id(dev, PCI_ID_RID, &rid) == 0)
2812                 host_info->bdf = rid;
2813         host_info->os_type = ENA_ADMIN_OS_FREEBSD;
2814         host_info->kernel_ver = osreldate;
2815
2816         sprintf(host_info->kernel_ver_str, "%d", osreldate);
2817         host_info->os_dist = 0;
2818         strncpy(host_info->os_dist_str, osrelease,
2819             sizeof(host_info->os_dist_str) - 1);
2820
2821         host_info->driver_version = (ENA_DRV_MODULE_VER_MAJOR) |
2822             (ENA_DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2823             (ENA_DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
2824         host_info->num_cpus = mp_ncpus;
2825         host_info->driver_supported_features =
2826             ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
2827             ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
2828
2829         rc = ena_com_set_host_attributes(ena_dev);
2830         if (unlikely(rc != 0)) {
2831                 if (rc == EOPNOTSUPP)
2832                         ena_log(dev, WARN, "Cannot set host attributes\n");
2833                 else
2834                         ena_log(dev, ERR, "Cannot set host attributes\n");
2835
2836                 goto err;
2837         }
2838
2839         return;
2840
2841 err:
2842         ena_com_delete_host_info(ena_dev);
2843 }
2844
2845 static int
2846 ena_device_init(struct ena_adapter *adapter, device_t pdev,
2847     struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active)
2848 {
2849         struct ena_llq_configurations llq_config;
2850         struct ena_com_dev *ena_dev = adapter->ena_dev;
2851         bool readless_supported;
2852         uint32_t aenq_groups;
2853         int dma_width;
2854         int rc;
2855
2856         rc = ena_com_mmio_reg_read_request_init(ena_dev);
2857         if (unlikely(rc != 0)) {
2858                 ena_log(pdev, ERR, "failed to init mmio read less\n");
2859                 return (rc);
2860         }
2861
2862         /*
2863          * The PCIe configuration space revision id indicate if mmio reg
2864          * read is disabled
2865          */
2866         readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ);
2867         ena_com_set_mmio_read_mode(ena_dev, readless_supported);
2868
2869         rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
2870         if (unlikely(rc != 0)) {
2871                 ena_log(pdev, ERR, "Can not reset device\n");
2872                 goto err_mmio_read_less;
2873         }
2874
2875         rc = ena_com_validate_version(ena_dev);
2876         if (unlikely(rc != 0)) {
2877                 ena_log(pdev, ERR, "device version is too low\n");
2878                 goto err_mmio_read_less;
2879         }
2880
2881         dma_width = ena_com_get_dma_width(ena_dev);
2882         if (unlikely(dma_width < 0)) {
2883                 ena_log(pdev, ERR, "Invalid dma width value %d", dma_width);
2884                 rc = dma_width;
2885                 goto err_mmio_read_less;
2886         }
2887         adapter->dma_width = dma_width;
2888
2889         /* ENA admin level init */
2890         rc = ena_com_admin_init(ena_dev, &aenq_handlers);
2891         if (unlikely(rc != 0)) {
2892                 ena_log(pdev, ERR,
2893                     "Can not initialize ena admin queue with device\n");
2894                 goto err_mmio_read_less;
2895         }
2896
2897         /*
2898          * To enable the msix interrupts the driver needs to know the number
2899          * of queues. So the driver uses polling mode to retrieve this
2900          * information
2901          */
2902         ena_com_set_admin_polling_mode(ena_dev, true);
2903
2904         ena_config_host_info(ena_dev, pdev);
2905
2906         /* Get Device Attributes */
2907         rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
2908         if (unlikely(rc != 0)) {
2909                 ena_log(pdev, ERR,
2910                     "Cannot get attribute for ena device rc: %d\n", rc);
2911                 goto err_admin_init;
2912         }
2913
2914         aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
2915             BIT(ENA_ADMIN_FATAL_ERROR) |
2916             BIT(ENA_ADMIN_WARNING) |
2917             BIT(ENA_ADMIN_NOTIFICATION) |
2918             BIT(ENA_ADMIN_KEEP_ALIVE);
2919
2920         aenq_groups &= get_feat_ctx->aenq.supported_groups;
2921         rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
2922         if (unlikely(rc != 0)) {
2923                 ena_log(pdev, ERR, "Cannot configure aenq groups rc: %d\n", rc);
2924                 goto err_admin_init;
2925         }
2926
2927         *wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
2928
2929         set_default_llq_configurations(&llq_config, &get_feat_ctx->llq);
2930
2931         rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
2932             &llq_config);
2933         if (unlikely(rc != 0)) {
2934                 ena_log(pdev, ERR, "Failed to set placement policy\n");
2935                 goto err_admin_init;
2936         }
2937
2938         return (0);
2939
2940 err_admin_init:
2941         ena_com_delete_host_info(ena_dev);
2942         ena_com_admin_destroy(ena_dev);
2943 err_mmio_read_less:
2944         ena_com_mmio_reg_read_request_destroy(ena_dev);
2945
2946         return (rc);
2947 }
2948
2949 static int
2950 ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
2951 {
2952         struct ena_com_dev *ena_dev = adapter->ena_dev;
2953         int rc;
2954
2955         rc = ena_enable_msix(adapter);
2956         if (unlikely(rc != 0)) {
2957                 ena_log(adapter->pdev, ERR, "Error with MSI-X enablement\n");
2958                 return (rc);
2959         }
2960
2961         ena_setup_mgmnt_intr(adapter);
2962
2963         rc = ena_request_mgmnt_irq(adapter);
2964         if (unlikely(rc != 0)) {
2965                 ena_log(adapter->pdev, ERR, "Cannot setup mgmnt queue intr\n");
2966                 goto err_disable_msix;
2967         }
2968
2969         ena_com_set_admin_polling_mode(ena_dev, false);
2970
2971         ena_com_admin_aenq_enable(ena_dev);
2972
2973         return (0);
2974
2975 err_disable_msix:
2976         ena_disable_msix(adapter);
2977
2978         return (rc);
2979 }
2980
2981 /* Function called on ENA_ADMIN_KEEP_ALIVE event */
2982 static void
2983 ena_keep_alive_wd(void *adapter_data, struct ena_admin_aenq_entry *aenq_e)
2984 {
2985         struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
2986         struct ena_admin_aenq_keep_alive_desc *desc;
2987         sbintime_t stime;
2988         uint64_t rx_drops;
2989         uint64_t tx_drops;
2990
2991         desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
2992
2993         rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
2994         tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low;
2995         counter_u64_zero(adapter->hw_stats.rx_drops);
2996         counter_u64_add(adapter->hw_stats.rx_drops, rx_drops);
2997         counter_u64_zero(adapter->hw_stats.tx_drops);
2998         counter_u64_add(adapter->hw_stats.tx_drops, tx_drops);
2999
3000         stime = getsbinuptime();
3001         atomic_store_rel_64(&adapter->keep_alive_timestamp, stime);
3002 }
3003
3004 /* Check for keep alive expiration */
3005 static void
3006 check_for_missing_keep_alive(struct ena_adapter *adapter)
3007 {
3008         sbintime_t timestamp, time;
3009
3010         if (adapter->wd_active == 0)
3011                 return;
3012
3013         if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3014                 return;
3015
3016         timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp);
3017         time = getsbinuptime() - timestamp;
3018         if (unlikely(time > adapter->keep_alive_timeout)) {
3019                 ena_log(adapter->pdev, ERR, "Keep alive watchdog timeout.\n");
3020                 counter_u64_add(adapter->dev_stats.wd_expired, 1);
3021                 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
3022         }
3023 }
3024
3025 /* Check if admin queue is enabled */
3026 static void
3027 check_for_admin_com_state(struct ena_adapter *adapter)
3028 {
3029         if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) == false)) {
3030                 ena_log(adapter->pdev, ERR,
3031                     "ENA admin queue is not in running state!\n");
3032                 counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
3033                 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO);
3034         }
3035 }
3036
3037 static int
3038 check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3039     struct ena_ring *rx_ring)
3040 {
3041         if (likely(atomic_load_8(&rx_ring->first_interrupt)))
3042                 return (0);
3043
3044         if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3045                 return (0);
3046
3047         rx_ring->no_interrupt_event_cnt++;
3048
3049         if (rx_ring->no_interrupt_event_cnt ==
3050             ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3051                 ena_log(adapter->pdev, ERR,
3052                     "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
3053                     rx_ring->qid);
3054                 ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3055                 return (EIO);
3056         }
3057
3058         return (0);
3059 }
3060
3061 static int
3062 check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3063     struct ena_ring *tx_ring)
3064 {
3065         device_t pdev = adapter->pdev;
3066         struct bintime curtime, time;
3067         struct ena_tx_buffer *tx_buf;
3068         int time_since_last_cleanup;
3069         int missing_tx_comp_to;
3070         sbintime_t time_offset;
3071         uint32_t missed_tx = 0;
3072         int i, rc = 0;
3073
3074         getbinuptime(&curtime);
3075
3076         for (i = 0; i < tx_ring->ring_size; i++) {
3077                 tx_buf = &tx_ring->tx_buffer_info[i];
3078
3079                 if (bintime_isset(&tx_buf->timestamp) == 0)
3080                         continue;
3081
3082                 time = curtime;
3083                 bintime_sub(&time, &tx_buf->timestamp);
3084                 time_offset = bttosbt(time);
3085
3086                 if (unlikely(!atomic_load_8(&tx_ring->first_interrupt) &&
3087                     time_offset > 2 * adapter->missing_tx_timeout)) {
3088                         /*
3089                          * If after graceful period interrupt is still not
3090                          * received, we schedule a reset.
3091                          */
3092                         ena_log(pdev, ERR,
3093                             "Potential MSIX issue on Tx side Queue = %d. "
3094                             "Reset the device\n",
3095                             tx_ring->qid);
3096                         ena_trigger_reset(adapter,
3097                             ENA_REGS_RESET_MISS_INTERRUPT);
3098                         return (EIO);
3099                 }
3100
3101                 /* Check again if packet is still waiting */
3102                 if (unlikely(time_offset > adapter->missing_tx_timeout)) {
3103
3104                         if (tx_buf->print_once) {
3105                                 time_since_last_cleanup = TICKS_2_USEC(ticks -
3106                                     tx_ring->tx_last_cleanup_ticks);
3107                                 missing_tx_comp_to = sbttoms(
3108                                     adapter->missing_tx_timeout);
3109                                 ena_log(pdev, WARN,
3110                                     "Found a Tx that wasn't completed on time, qid %d, index %d. "
3111                                     "%d usecs have passed since last cleanup. Missing Tx timeout value %d msecs.\n",
3112                                     tx_ring->qid, i, time_since_last_cleanup,
3113                                     missing_tx_comp_to);
3114                         }
3115
3116                         tx_buf->print_once = false;
3117                         missed_tx++;
3118                 }
3119         }
3120
3121         if (unlikely(missed_tx > adapter->missing_tx_threshold)) {
3122                 ena_log(pdev, ERR,
3123                     "The number of lost tx completion is above the threshold "
3124                     "(%d > %d). Reset the device\n",
3125                     missed_tx, adapter->missing_tx_threshold);
3126                 ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
3127                 rc = EIO;
3128         }
3129
3130         counter_u64_add(tx_ring->tx_stats.missing_tx_comp, missed_tx);
3131
3132         return (rc);
3133 }
3134
3135 /*
3136  * Check for TX which were not completed on time.
3137  * Timeout is defined by "missing_tx_timeout".
3138  * Reset will be performed if number of incompleted
3139  * transactions exceeds "missing_tx_threshold".
3140  */
3141 static void
3142 check_for_missing_completions(struct ena_adapter *adapter)
3143 {
3144         struct ena_ring *tx_ring;
3145         struct ena_ring *rx_ring;
3146         int i, budget, rc;
3147
3148         /* Make sure the driver doesn't turn the device in other process */
3149         rmb();
3150
3151         if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3152                 return;
3153
3154         if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3155                 return;
3156
3157         if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3158                 return;
3159
3160         budget = adapter->missing_tx_max_queues;
3161
3162         for (i = adapter->next_monitored_tx_qid; i < adapter->num_io_queues; i++) {
3163                 tx_ring = &adapter->tx_ring[i];
3164                 rx_ring = &adapter->rx_ring[i];
3165
3166                 rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3167                 if (unlikely(rc != 0))
3168                         return;
3169
3170                 rc = check_for_rx_interrupt_queue(adapter, rx_ring);
3171                 if (unlikely(rc != 0))
3172                         return;
3173
3174                 budget--;
3175                 if (budget == 0) {
3176                         i++;
3177                         break;
3178                 }
3179         }
3180
3181         adapter->next_monitored_tx_qid = i % adapter->num_io_queues;
3182 }
3183
3184 /* trigger rx cleanup after 2 consecutive detections */
3185 #define EMPTY_RX_REFILL 2
3186 /* For the rare case where the device runs out of Rx descriptors and the
3187  * msix handler failed to refill new Rx descriptors (due to a lack of memory
3188  * for example).
3189  * This case will lead to a deadlock:
3190  * The device won't send interrupts since all the new Rx packets will be dropped
3191  * The msix handler won't allocate new Rx descriptors so the device won't be
3192  * able to send new packets.
3193  *
3194  * When such a situation is detected - execute rx cleanup task in another thread
3195  */
3196 static void
3197 check_for_empty_rx_ring(struct ena_adapter *adapter)
3198 {
3199         struct ena_ring *rx_ring;
3200         int i, refill_required;
3201
3202         if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3203                 return;
3204
3205         if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3206                 return;
3207
3208         for (i = 0; i < adapter->num_io_queues; i++) {
3209                 rx_ring = &adapter->rx_ring[i];
3210
3211                 refill_required = ena_com_free_q_entries(
3212                     rx_ring->ena_com_io_sq);
3213                 if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3214                         rx_ring->empty_rx_queue++;
3215
3216                         if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3217                                 counter_u64_add(rx_ring->rx_stats.empty_rx_ring,
3218                                     1);
3219
3220                                 ena_log(adapter->pdev, WARN,
3221                                     "Rx ring %d is stalled. Triggering the refill function\n",
3222                                     i);
3223
3224                                 taskqueue_enqueue(rx_ring->que->cleanup_tq,
3225                                     &rx_ring->que->cleanup_task);
3226                                 rx_ring->empty_rx_queue = 0;
3227                         }
3228                 } else {
3229                         rx_ring->empty_rx_queue = 0;
3230                 }
3231         }
3232 }
3233
3234 static void
3235 ena_update_hints(struct ena_adapter *adapter,
3236     struct ena_admin_ena_hw_hints *hints)
3237 {
3238         struct ena_com_dev *ena_dev = adapter->ena_dev;
3239
3240         if (hints->admin_completion_tx_timeout)
3241                 ena_dev->admin_queue.completion_timeout =
3242                     hints->admin_completion_tx_timeout * 1000;
3243
3244         if (hints->mmio_read_timeout)
3245                 /* convert to usec */
3246                 ena_dev->mmio_read.reg_read_to = hints->mmio_read_timeout * 1000;
3247
3248         if (hints->missed_tx_completion_count_threshold_to_reset)
3249                 adapter->missing_tx_threshold =
3250                     hints->missed_tx_completion_count_threshold_to_reset;
3251
3252         if (hints->missing_tx_completion_timeout) {
3253                 if (hints->missing_tx_completion_timeout ==
3254                     ENA_HW_HINTS_NO_TIMEOUT)
3255                         adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3256                 else
3257                         adapter->missing_tx_timeout = SBT_1MS *
3258                             hints->missing_tx_completion_timeout;
3259         }
3260
3261         if (hints->driver_watchdog_timeout) {
3262                 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3263                         adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3264                 else
3265                         adapter->keep_alive_timeout = SBT_1MS *
3266                             hints->driver_watchdog_timeout;
3267         }
3268 }
3269
3270 /**
3271  * ena_copy_eni_metrics - Get and copy ENI metrics from the HW.
3272  * @adapter: ENA device adapter
3273  *
3274  * Returns 0 on success, EOPNOTSUPP if current HW doesn't support those metrics
3275  * and other error codes on failure.
3276  *
3277  * This function can possibly cause a race with other calls to the admin queue.
3278  * Because of that, the caller should either lock this function or make sure
3279  * that there is no race in the current context.
3280  */
3281 static int
3282 ena_copy_eni_metrics(struct ena_adapter *adapter)
3283 {
3284         static bool print_once = true;
3285         int rc;
3286
3287         rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_metrics);
3288
3289         if (rc != 0) {
3290                 if (rc == ENA_COM_UNSUPPORTED) {
3291                         if (print_once) {
3292                                 ena_log(adapter->pdev, WARN,
3293                                     "Retrieving ENI metrics is not supported.\n");
3294                                 print_once = false;
3295                         } else {
3296                                 ena_log(adapter->pdev, DBG,
3297                                     "Retrieving ENI metrics is not supported.\n");
3298                         }
3299                 } else {
3300                         ena_log(adapter->pdev, ERR,
3301                             "Failed to get ENI metrics: %d\n", rc);
3302                 }
3303         }
3304
3305         return (rc);
3306 }
3307
3308 static void
3309 ena_timer_service(void *data)
3310 {
3311         struct ena_adapter *adapter = (struct ena_adapter *)data;
3312         struct ena_admin_host_info *host_info =
3313             adapter->ena_dev->host_attr.host_info;
3314
3315         check_for_missing_keep_alive(adapter);
3316
3317         check_for_admin_com_state(adapter);
3318
3319         check_for_missing_completions(adapter);
3320
3321         check_for_empty_rx_ring(adapter);
3322
3323         /*
3324          * User controller update of the ENI metrics.
3325          * If the delay was set to 0, then the stats shouldn't be updated at
3326          * all.
3327          * Otherwise, wait 'eni_metrics_sample_interval' seconds, before
3328          * updating stats.
3329          * As timer service is executed every second, it's enough to increment
3330          * appropriate counter each time the timer service is executed.
3331          */
3332         if ((adapter->eni_metrics_sample_interval != 0) &&
3333             (++adapter->eni_metrics_sample_interval_cnt >=
3334              adapter->eni_metrics_sample_interval)) {
3335                 taskqueue_enqueue(adapter->metrics_tq, &adapter->metrics_task);
3336                 adapter->eni_metrics_sample_interval_cnt = 0;
3337         }
3338
3339
3340         if (host_info != NULL)
3341                 ena_update_host_info(host_info, adapter->ifp);
3342
3343         if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3344                 /*
3345                  * Timeout when validating version indicates that the device
3346                  * became unresponsive. If that happens skip the reset and
3347                  * reschedule timer service, so the reset can be retried later.
3348                  */
3349                 if (ena_com_validate_version(adapter->ena_dev) ==
3350                     ENA_COM_TIMER_EXPIRED) {
3351                         ena_log(adapter->pdev, WARN,
3352                             "FW unresponsive, skipping reset\n");
3353                         ENA_TIMER_RESET(adapter);
3354                         return;
3355                 }
3356                 ena_log(adapter->pdev, WARN, "Trigger reset is on\n");
3357                 taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task);
3358                 return;
3359         }
3360
3361         /*
3362          * Schedule another timeout one second from now.
3363          */
3364         ENA_TIMER_RESET(adapter);
3365 }
3366
3367 void
3368 ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3369 {
3370         if_t ifp = adapter->ifp;
3371         struct ena_com_dev *ena_dev = adapter->ena_dev;
3372         bool dev_up;
3373
3374         if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))
3375                 return;
3376
3377         if (!graceful)
3378                 if_link_state_change(ifp, LINK_STATE_DOWN);
3379
3380         ENA_TIMER_DRAIN(adapter);
3381
3382         dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
3383         if (dev_up)
3384                 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3385
3386         if (!graceful)
3387                 ena_com_set_admin_running_state(ena_dev, false);
3388
3389         if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3390                 ena_down(adapter);
3391
3392         /*
3393          * Stop the device from sending AENQ events (if the device was up, and
3394          * the trigger reset was on, ena_down already performs device reset)
3395          */
3396         if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up))
3397                 ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3398
3399         ena_free_mgmnt_irq(adapter);
3400
3401         ena_disable_msix(adapter);
3402
3403         /*
3404          * IO rings resources should be freed because `ena_restore_device()`
3405          * calls (not directly) `ena_enable_msix()`, which re-allocates MSIX
3406          * vectors. The amount of MSIX vectors after destroy-restore may be
3407          * different than before. Therefore, IO rings resources should be
3408          * established from scratch each time.
3409          */
3410         ena_free_all_io_rings_resources(adapter);
3411
3412         ena_com_abort_admin_commands(ena_dev);
3413
3414         ena_com_wait_for_abort_completion(ena_dev);
3415
3416         ena_com_admin_destroy(ena_dev);
3417
3418         ena_com_mmio_reg_read_request_destroy(ena_dev);
3419
3420         adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3421
3422         ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3423         ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3424 }
3425
3426 static int
3427 ena_device_validate_params(struct ena_adapter *adapter,
3428     struct ena_com_dev_get_features_ctx *get_feat_ctx)
3429 {
3430         if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr,
3431             ETHER_ADDR_LEN) != 0) {
3432                 ena_log(adapter->pdev, ERR, "Error, mac addresses differ\n");
3433                 return (EINVAL);
3434         }
3435
3436         if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) {
3437                 ena_log(adapter->pdev, ERR,
3438                     "Error, device max mtu is smaller than ifp MTU\n");
3439                 return (EINVAL);
3440         }
3441
3442         return 0;
3443 }
3444
3445 int
3446 ena_restore_device(struct ena_adapter *adapter)
3447 {
3448         struct ena_com_dev_get_features_ctx get_feat_ctx;
3449         struct ena_com_dev *ena_dev = adapter->ena_dev;
3450         if_t ifp = adapter->ifp;
3451         device_t dev = adapter->pdev;
3452         int wd_active;
3453         int rc;
3454
3455         ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3456
3457         rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active);
3458         if (rc != 0) {
3459                 ena_log(dev, ERR, "Cannot initialize device\n");
3460                 goto err;
3461         }
3462         /*
3463          * Only enable WD if it was enabled before reset, so it won't override
3464          * value set by the user by the sysctl.
3465          */
3466         if (adapter->wd_active != 0)
3467                 adapter->wd_active = wd_active;
3468
3469         rc = ena_device_validate_params(adapter, &get_feat_ctx);
3470         if (rc != 0) {
3471                 ena_log(dev, ERR, "Validation of device parameters failed\n");
3472                 goto err_device_destroy;
3473         }
3474
3475         ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3476         /* Make sure we don't have a race with AENQ Links state handler */
3477         if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
3478                 if_link_state_change(ifp, LINK_STATE_UP);
3479
3480         rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3481         if (rc != 0) {
3482                 ena_log(dev, ERR, "Enable MSI-X failed\n");
3483                 goto err_device_destroy;
3484         }
3485
3486         /*
3487          * Effective value of used MSIX vectors should be the same as before
3488          * `ena_destroy_device()`, if possible, or closest to it if less vectors
3489          * are available.
3490          */
3491         if ((adapter->msix_vecs - ENA_ADMIN_MSIX_VEC) < adapter->num_io_queues)
3492                 adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3493
3494         /* Re-initialize rings basic information */
3495         ena_init_io_rings(adapter);
3496
3497         /* If the interface was up before the reset bring it up */
3498         if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
3499                 rc = ena_up(adapter);
3500                 if (rc != 0) {
3501                         ena_log(dev, ERR, "Failed to create I/O queues\n");
3502                         goto err_disable_msix;
3503                 }
3504         }
3505
3506         /* Indicate that device is running again and ready to work */
3507         ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3508
3509         /*
3510          * As the AENQ handlers weren't executed during reset because
3511          * the flag ENA_FLAG_DEVICE_RUNNING was turned off, the
3512          * timestamp must be updated again That will prevent next reset
3513          * caused by missing keep alive.
3514          */
3515         adapter->keep_alive_timestamp = getsbinuptime();
3516         ENA_TIMER_RESET(adapter);
3517
3518         ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3519
3520         return (rc);
3521
3522 err_disable_msix:
3523         ena_free_mgmnt_irq(adapter);
3524         ena_disable_msix(adapter);
3525 err_device_destroy:
3526         ena_com_abort_admin_commands(ena_dev);
3527         ena_com_wait_for_abort_completion(ena_dev);
3528         ena_com_admin_destroy(ena_dev);
3529         ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3530         ena_com_mmio_reg_read_request_destroy(ena_dev);
3531 err:
3532         ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3533         ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3534         ena_log(dev, ERR, "Reset attempt failed. Can not reset the device\n");
3535
3536         return (rc);
3537 }
3538
3539 static void
3540 ena_metrics_task(void *arg, int pending)
3541 {
3542         struct ena_adapter *adapter = (struct ena_adapter *)arg;
3543
3544         ENA_LOCK_LOCK();
3545         (void)ena_copy_eni_metrics(adapter);
3546         ENA_LOCK_UNLOCK();
3547 }
3548
3549 static void
3550 ena_reset_task(void *arg, int pending)
3551 {
3552         struct ena_adapter *adapter = (struct ena_adapter *)arg;
3553
3554         ENA_LOCK_LOCK();
3555         if (likely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3556                 ena_destroy_device(adapter, false);
3557                 ena_restore_device(adapter);
3558
3559                 ena_log(adapter->pdev, INFO,
3560                     "Device reset completed successfully, Driver info: %s\n",
3561                     ena_version);
3562         }
3563         ENA_LOCK_UNLOCK();
3564 }
3565
3566 static void
3567 ena_free_stats(struct ena_adapter *adapter)
3568 {
3569         ena_free_counters((counter_u64_t *)&adapter->hw_stats,
3570             sizeof(struct ena_hw_stats));
3571         ena_free_counters((counter_u64_t *)&adapter->dev_stats,
3572             sizeof(struct ena_stats_dev));
3573
3574 }
3575 /**
3576  * ena_attach - Device Initialization Routine
3577  * @pdev: device information struct
3578  *
3579  * Returns 0 on success, otherwise on failure.
3580  *
3581  * ena_attach initializes an adapter identified by a device structure.
3582  * The OS initialization, configuring of the adapter private structure,
3583  * and a hardware reset occur.
3584  **/
3585 static int
3586 ena_attach(device_t pdev)
3587 {
3588         struct ena_com_dev_get_features_ctx get_feat_ctx;
3589         struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
3590         static int version_printed;
3591         struct ena_adapter *adapter;
3592         struct ena_com_dev *ena_dev = NULL;
3593         uint32_t max_num_io_queues;
3594         int msix_rid;
3595         int rid, rc;
3596
3597         adapter = device_get_softc(pdev);
3598         adapter->pdev = pdev;
3599         adapter->first_bind = -1;
3600
3601         /*
3602          * Set up the timer service - driver is responsible for avoiding
3603          * concurrency, as the callout won't be using any locking inside.
3604          */
3605         ENA_TIMER_INIT(adapter);
3606         adapter->keep_alive_timeout = ENA_DEFAULT_KEEP_ALIVE_TO;
3607         adapter->missing_tx_timeout = ENA_DEFAULT_TX_CMP_TO;
3608         adapter->missing_tx_max_queues = ENA_DEFAULT_TX_MONITORED_QUEUES;
3609         adapter->missing_tx_threshold = ENA_DEFAULT_TX_CMP_THRESHOLD;
3610
3611         adapter->irq_cpu_base = ENA_BASE_CPU_UNSPECIFIED;
3612         adapter->irq_cpu_stride = 0;
3613
3614 #ifdef RSS
3615         adapter->rss_enabled = 1;
3616 #endif
3617
3618         if (version_printed++ == 0)
3619                 ena_log(pdev, INFO, "%s\n", ena_version);
3620
3621         /* Allocate memory for ena_dev structure */
3622         ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF,
3623             M_WAITOK | M_ZERO);
3624
3625         adapter->ena_dev = ena_dev;
3626         ena_dev->dmadev = pdev;
3627
3628         rid = PCIR_BAR(ENA_REG_BAR);
3629         adapter->memory = NULL;
3630         adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid,
3631             RF_ACTIVE);
3632         if (unlikely(adapter->registers == NULL)) {
3633                 ena_log(pdev, ERR,
3634                     "unable to allocate bus resource: registers!\n");
3635                 rc = ENOMEM;
3636                 goto err_dev_free;
3637         }
3638
3639         /* MSIx vector table may reside on BAR0 with registers or on BAR1. */
3640         msix_rid = pci_msix_table_bar(pdev);
3641         if (msix_rid != rid) {
3642                 adapter->msix = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
3643                     &msix_rid, RF_ACTIVE);
3644                 if (unlikely(adapter->msix == NULL)) {
3645                         ena_log(pdev, ERR,
3646                             "unable to allocate bus resource: msix!\n");
3647                         rc = ENOMEM;
3648                         goto err_pci_free;
3649                 }
3650                 adapter->msix_rid = msix_rid;
3651         }
3652
3653         ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF,
3654             M_WAITOK | M_ZERO);
3655
3656         /* Store register resources */
3657         ((struct ena_bus *)(ena_dev->bus))->reg_bar_t = rman_get_bustag(
3658             adapter->registers);
3659         ((struct ena_bus *)(ena_dev->bus))->reg_bar_h = rman_get_bushandle(
3660             adapter->registers);
3661
3662         if (unlikely(((struct ena_bus *)(ena_dev->bus))->reg_bar_h == 0)) {
3663                 ena_log(pdev, ERR, "failed to pmap registers bar\n");
3664                 rc = ENXIO;
3665                 goto err_bus_free;
3666         }
3667
3668         rc = ena_map_llq_mem_bar(pdev, ena_dev);
3669         if (unlikely(rc != 0)) {
3670                 ena_log(pdev, ERR, "Failed to map ENA mem bar");
3671                 goto err_bus_free;
3672         }
3673
3674         /* Initially clear all the flags */
3675         ENA_FLAG_ZERO(adapter);
3676
3677         /* Device initialization */
3678         rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active);
3679         if (unlikely(rc != 0)) {
3680                 ena_log(pdev, ERR, "ENA device init failed! (err: %d)\n", rc);
3681                 rc = ENXIO;
3682                 goto err_bus_free;
3683         }
3684
3685         if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3686                 adapter->disable_meta_caching = !!(
3687                     get_feat_ctx.llq.accel_mode.u.get.supported_flags &
3688                     BIT(ENA_ADMIN_DISABLE_META_CACHING));
3689
3690         adapter->keep_alive_timestamp = getsbinuptime();
3691
3692         adapter->tx_offload_cap = get_feat_ctx.offload.tx;
3693
3694         memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
3695             ETHER_ADDR_LEN);
3696
3697         calc_queue_ctx.pdev = pdev;
3698         calc_queue_ctx.ena_dev = ena_dev;
3699         calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
3700
3701         /* Calculate initial and maximum IO queue number and size */
3702         max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev,
3703             &get_feat_ctx);
3704         rc = ena_calc_io_queue_size(&calc_queue_ctx);
3705         if (unlikely((rc != 0) || (max_num_io_queues <= 0))) {
3706                 rc = EFAULT;
3707                 goto err_com_free;
3708         }
3709
3710         adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
3711         adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
3712         adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
3713         adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
3714         adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
3715         adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
3716
3717         adapter->max_num_io_queues = max_num_io_queues;
3718
3719         adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE;
3720
3721         adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
3722
3723         adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3724
3725         /* set up dma tags for rx and tx buffers */
3726         rc = ena_setup_tx_dma_tag(adapter);
3727         if (unlikely(rc != 0)) {
3728                 ena_log(pdev, ERR, "Failed to create TX DMA tag\n");
3729                 goto err_com_free;
3730         }
3731
3732         rc = ena_setup_rx_dma_tag(adapter);
3733         if (unlikely(rc != 0)) {
3734                 ena_log(pdev, ERR, "Failed to create RX DMA tag\n");
3735                 goto err_tx_tag_free;
3736         }
3737
3738         /*
3739          * The amount of requested MSIX vectors is equal to
3740          * adapter::max_num_io_queues (see `ena_enable_msix()`), plus a constant
3741          * number of admin queue interrupts. The former is initially determined
3742          * by HW capabilities (see `ena_calc_max_io_queue_num())` but may not be
3743          * achieved if there are not enough system resources. By default, the
3744          * number of effectively used IO queues is the same but later on it can
3745          * be limited by the user using sysctl interface.
3746          */
3747         rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3748         if (unlikely(rc != 0)) {
3749                 ena_log(pdev, ERR,
3750                     "Failed to enable and set the admin interrupts\n");
3751                 goto err_io_free;
3752         }
3753         /* By default all of allocated MSIX vectors are actively used */
3754         adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3755
3756         /* initialize rings basic information */
3757         ena_init_io_rings(adapter);
3758
3759         /* Initialize statistics */
3760         ena_alloc_counters((counter_u64_t *)&adapter->dev_stats,
3761             sizeof(struct ena_stats_dev));
3762         ena_alloc_counters((counter_u64_t *)&adapter->hw_stats,
3763             sizeof(struct ena_hw_stats));
3764         ena_sysctl_add_nodes(adapter);
3765
3766         /* setup network interface */
3767         rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx);
3768         if (unlikely(rc != 0)) {
3769                 ena_log(pdev, ERR, "Error with network interface setup\n");
3770                 goto err_msix_free;
3771         }
3772
3773         /* Initialize reset task queue */
3774         TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter);
3775         adapter->reset_tq = taskqueue_create("ena_reset_enqueue",
3776             M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq);
3777         taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET, "%s rstq",
3778             device_get_nameunit(adapter->pdev));
3779
3780         /* Initialize metrics task queue */
3781         TASK_INIT(&adapter->metrics_task, 0, ena_metrics_task, adapter);
3782         adapter->metrics_tq = taskqueue_create("ena_metrics_enqueue",
3783             M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->metrics_tq);
3784         taskqueue_start_threads(&adapter->metrics_tq, 1, PI_NET, "%s metricsq",
3785             device_get_nameunit(adapter->pdev));
3786
3787 #ifdef DEV_NETMAP
3788         rc = ena_netmap_attach(adapter);
3789         if (rc != 0) {
3790                 ena_log(pdev, ERR, "netmap attach failed: %d\n", rc);
3791                 goto err_detach;
3792         }
3793 #endif /* DEV_NETMAP */
3794
3795         /* Tell the stack that the interface is not active */
3796         if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
3797         ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3798
3799         /* Run the timer service */
3800         ENA_TIMER_RESET(adapter);
3801
3802         return (0);
3803
3804 #ifdef DEV_NETMAP
3805 err_detach:
3806         ether_ifdetach(adapter->ifp);
3807 #endif /* DEV_NETMAP */
3808 err_msix_free:
3809         ena_free_stats(adapter);
3810         ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR);
3811         ena_free_mgmnt_irq(adapter);
3812         ena_disable_msix(adapter);
3813 err_io_free:
3814         ena_free_all_io_rings_resources(adapter);
3815         ena_free_rx_dma_tag(adapter);
3816 err_tx_tag_free:
3817         ena_free_tx_dma_tag(adapter);
3818 err_com_free:
3819         ena_com_admin_destroy(ena_dev);
3820         ena_com_delete_host_info(ena_dev);
3821         ena_com_mmio_reg_read_request_destroy(ena_dev);
3822 err_bus_free:
3823         free(ena_dev->bus, M_DEVBUF);
3824 err_pci_free:
3825         ena_free_pci_resources(adapter);
3826 err_dev_free:
3827         free(ena_dev, M_DEVBUF);
3828
3829         return (rc);
3830 }
3831
3832 /**
3833  * ena_detach - Device Removal Routine
3834  * @pdev: device information struct
3835  *
3836  * ena_detach is called by the device subsystem to alert the driver
3837  * that it should release a PCI device.
3838  **/
3839 static int
3840 ena_detach(device_t pdev)
3841 {
3842         struct ena_adapter *adapter = device_get_softc(pdev);
3843         struct ena_com_dev *ena_dev = adapter->ena_dev;
3844         int rc;
3845
3846         /* Make sure VLANS are not using driver */
3847         if (if_vlantrunkinuse(adapter->ifp)) {
3848                 ena_log(adapter->pdev, ERR, "VLAN is in use, detach first\n");
3849                 return (EBUSY);
3850         }
3851
3852         ether_ifdetach(adapter->ifp);
3853
3854         /* Stop timer service */
3855         ENA_LOCK_LOCK();
3856         ENA_TIMER_DRAIN(adapter);
3857         ENA_LOCK_UNLOCK();
3858
3859         /* Release metrics task */
3860         while (taskqueue_cancel(adapter->metrics_tq, &adapter->metrics_task, NULL))
3861                 taskqueue_drain(adapter->metrics_tq, &adapter->metrics_task);
3862         taskqueue_free(adapter->metrics_tq);
3863
3864         /* Release reset task */
3865         while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL))
3866                 taskqueue_drain(adapter->reset_tq, &adapter->reset_task);
3867         taskqueue_free(adapter->reset_tq);
3868
3869         ENA_LOCK_LOCK();
3870         ena_down(adapter);
3871         ena_destroy_device(adapter, true);
3872         ENA_LOCK_UNLOCK();
3873
3874         /* Restore unregistered sysctl queue nodes. */
3875         ena_sysctl_update_queue_node_nb(adapter, adapter->num_io_queues,
3876             adapter->max_num_io_queues);
3877
3878 #ifdef DEV_NETMAP
3879         netmap_detach(adapter->ifp);
3880 #endif /* DEV_NETMAP */
3881
3882         ena_free_stats(adapter);
3883
3884         rc = ena_free_rx_dma_tag(adapter);
3885         if (unlikely(rc != 0))
3886                 ena_log(adapter->pdev, WARN,
3887                     "Unmapped RX DMA tag associations\n");
3888
3889         rc = ena_free_tx_dma_tag(adapter);
3890         if (unlikely(rc != 0))
3891                 ena_log(adapter->pdev, WARN,
3892                     "Unmapped TX DMA tag associations\n");
3893
3894         ena_free_irqs(adapter);
3895
3896         ena_free_pci_resources(adapter);
3897
3898         if (adapter->rss_indir != NULL)
3899                 free(adapter->rss_indir, M_DEVBUF);
3900
3901         if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)))
3902                 ena_com_rss_destroy(ena_dev);
3903
3904         ena_com_delete_host_info(ena_dev);
3905
3906         if_free(adapter->ifp);
3907
3908         free(ena_dev->bus, M_DEVBUF);
3909
3910         free(ena_dev, M_DEVBUF);
3911
3912         return (bus_generic_detach(pdev));
3913 }
3914
3915 /******************************************************************************
3916  ******************************** AENQ Handlers *******************************
3917  *****************************************************************************/
3918 /**
3919  * ena_update_on_link_change:
3920  * Notify the network interface about the change in link status
3921  **/
3922 static void
3923 ena_update_on_link_change(void *adapter_data,
3924     struct ena_admin_aenq_entry *aenq_e)
3925 {
3926         struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3927         struct ena_admin_aenq_link_change_desc *aenq_desc;
3928         int status;
3929         if_t ifp;
3930
3931         aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
3932         ifp = adapter->ifp;
3933         status = aenq_desc->flags &
3934             ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3935
3936         if (status != 0) {
3937                 ena_log(adapter->pdev, INFO, "link is UP\n");
3938                 ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3939                 if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter))
3940                         if_link_state_change(ifp, LINK_STATE_UP);
3941         } else {
3942                 ena_log(adapter->pdev, INFO, "link is DOWN\n");
3943                 if_link_state_change(ifp, LINK_STATE_DOWN);
3944                 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3945         }
3946 }
3947
3948 static void
3949 ena_notification(void *adapter_data, struct ena_admin_aenq_entry *aenq_e)
3950 {
3951         struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3952         struct ena_admin_ena_hw_hints *hints;
3953
3954         ENA_WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
3955             adapter->ena_dev, "Invalid group(%x) expected %x\n",
3956             aenq_e->aenq_common_desc.group, ENA_ADMIN_NOTIFICATION);
3957
3958         switch (aenq_e->aenq_common_desc.syndrome) {
3959         case ENA_ADMIN_UPDATE_HINTS:
3960                 hints =
3961                     (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4);
3962                 ena_update_hints(adapter, hints);
3963                 break;
3964         default:
3965                 ena_log(adapter->pdev, ERR,
3966                     "Invalid aenq notification link state %d\n",
3967                     aenq_e->aenq_common_desc.syndrome);
3968         }
3969 }
3970
3971 static void
3972 ena_lock_init(void *arg)
3973 {
3974         ENA_LOCK_INIT();
3975 }
3976 SYSINIT(ena_lock_init, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_init, NULL);
3977
3978 static void
3979 ena_lock_uninit(void *arg)
3980 {
3981         ENA_LOCK_DESTROY();
3982 }
3983 SYSUNINIT(ena_lock_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_uninit, NULL);
3984
3985 /**
3986  * This handler will called for unknown event group or unimplemented handlers
3987  **/
3988 static void
3989 unimplemented_aenq_handler(void *adapter_data,
3990     struct ena_admin_aenq_entry *aenq_e)
3991 {
3992         struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3993
3994         ena_log(adapter->pdev, ERR,
3995             "Unknown event was received or event with unimplemented handler\n");
3996 }
3997
3998 static struct ena_aenq_handlers aenq_handlers = {
3999     .handlers = {
4000             [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4001             [ENA_ADMIN_NOTIFICATION] = ena_notification,
4002             [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
4003     },
4004     .unimplemented_handler = unimplemented_aenq_handler
4005 };
4006
4007 /*********************************************************************
4008  *  FreeBSD Device Interface Entry Points
4009  *********************************************************************/
4010
4011 static device_method_t ena_methods[] = { /* Device interface */
4012         DEVMETHOD(device_probe, ena_probe),
4013         DEVMETHOD(device_attach, ena_attach),
4014         DEVMETHOD(device_detach, ena_detach), DEVMETHOD_END
4015 };
4016
4017 static driver_t ena_driver = {
4018         "ena",
4019         ena_methods,
4020         sizeof(struct ena_adapter),
4021 };
4022
4023 DRIVER_MODULE(ena, pci, ena_driver, 0, 0);
4024 MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array,
4025     nitems(ena_vendor_info_array) - 1);
4026 MODULE_DEPEND(ena, pci, 1, 1, 1);
4027 MODULE_DEPEND(ena, ether, 1, 1, 1);
4028 #ifdef DEV_NETMAP
4029 MODULE_DEPEND(ena, netmap, 1, 1, 1);
4030 #endif /* DEV_NETMAP */
4031
4032 /*********************************************************************/