4 * Copyright (c) 2015-2019 Amazon.com, Inc. or its affiliates.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
34 #include "ena_datapath.h"
36 #include "ena_netmap.h"
37 #endif /* DEV_NETMAP */
39 /*********************************************************************
40 * Static functions prototypes
41 *********************************************************************/
43 static int ena_tx_cleanup(struct ena_ring *);
44 static int ena_rx_cleanup(struct ena_ring *);
45 static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
46 static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
48 static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
49 struct ena_com_rx_ctx *, uint16_t *);
50 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
52 static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *);
53 static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
55 static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
56 static void ena_start_xmit(struct ena_ring *);
58 /*********************************************************************
60 *********************************************************************/
63 ena_cleanup(void *arg, int pending)
65 struct ena_que *que = arg;
66 struct ena_adapter *adapter = que->adapter;
67 if_t ifp = adapter->ifp;
68 struct ena_ring *tx_ring;
69 struct ena_ring *rx_ring;
70 struct ena_com_io_cq* io_cq;
71 struct ena_eth_io_intr_reg intr_reg;
75 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
78 ena_trace(ENA_DBG, "MSI-X TX/RX routine\n");
80 tx_ring = que->tx_ring;
81 rx_ring = que->rx_ring;
83 ena_qid = ENA_IO_TXQ_IDX(qid);
84 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
86 tx_ring->first_interrupt = true;
87 rx_ring->first_interrupt = true;
89 for (i = 0; i < CLEAN_BUDGET; ++i) {
90 rxc = ena_rx_cleanup(rx_ring);
91 txc = ena_tx_cleanup(tx_ring);
93 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
96 if ((txc != TX_BUDGET) && (rxc != RX_BUDGET))
100 /* Signal that work is done and unmask interrupt */
101 ena_com_update_intr_reg(&intr_reg,
105 ena_com_unmask_intr(io_cq, &intr_reg);
109 ena_deferred_mq_start(void *arg, int pending)
111 struct ena_ring *tx_ring = (struct ena_ring *)arg;
112 struct ifnet *ifp = tx_ring->adapter->ifp;
114 while (!drbr_empty(ifp, tx_ring->br) &&
116 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
117 ENA_RING_MTX_LOCK(tx_ring);
118 ena_start_xmit(tx_ring);
119 ENA_RING_MTX_UNLOCK(tx_ring);
124 ena_mq_start(if_t ifp, struct mbuf *m)
126 struct ena_adapter *adapter = ifp->if_softc;
127 struct ena_ring *tx_ring;
128 int ret, is_drbr_empty;
131 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
134 /* Which queue to use */
136 * If everything is setup correctly, it should be the
137 * same bucket that the current CPU we're on is.
138 * It should improve performance.
140 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
141 i = m->m_pkthdr.flowid % adapter->num_queues;
143 i = curcpu % adapter->num_queues;
145 tx_ring = &adapter->tx_ring[i];
147 /* Check if drbr is empty before putting packet */
148 is_drbr_empty = drbr_empty(ifp, tx_ring->br);
149 ret = drbr_enqueue(ifp, tx_ring->br, m);
150 if (unlikely(ret != 0)) {
151 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
155 if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
156 ena_start_xmit(tx_ring);
157 ENA_RING_MTX_UNLOCK(tx_ring);
159 taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
168 struct ena_adapter *adapter = ifp->if_softc;
169 struct ena_ring *tx_ring = adapter->tx_ring;
172 for(i = 0; i < adapter->num_queues; ++i, ++tx_ring)
173 if (!drbr_empty(ifp, tx_ring->br)) {
174 ENA_RING_MTX_LOCK(tx_ring);
175 drbr_flush(ifp, tx_ring->br);
176 ENA_RING_MTX_UNLOCK(tx_ring);
182 /*********************************************************************
184 *********************************************************************/
187 validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id)
189 struct ena_adapter *adapter = tx_ring->adapter;
190 struct ena_tx_buffer *tx_info = NULL;
192 if (likely(req_id < tx_ring->ring_size)) {
193 tx_info = &tx_ring->tx_buffer_info[req_id];
194 if (tx_info->mbuf != NULL)
196 device_printf(adapter->pdev,
197 "tx_info doesn't have valid mbuf\n");
200 device_printf(adapter->pdev, "Invalid req_id: %hu\n", req_id);
201 counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
203 /* Trigger device reset */
204 adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
205 ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
211 * ena_tx_cleanup - clear sent packets and corresponding descriptors
212 * @tx_ring: ring for which we want to clean packets
214 * Once packets are sent, we ask the device in a loop for no longer used
215 * descriptors. We find the related mbuf chain in a map (index in an array)
216 * and free it, then update ring state.
217 * This is performed in "endless" loop, updating ring pointers every
218 * TX_COMMIT. The first check of free descriptor is performed before the actual
219 * loop, then repeated at the loop end.
222 ena_tx_cleanup(struct ena_ring *tx_ring)
224 struct ena_adapter *adapter;
225 struct ena_com_io_cq* io_cq;
226 uint16_t next_to_clean;
229 unsigned int total_done = 0;
231 int commit = TX_COMMIT;
232 int budget = TX_BUDGET;
236 adapter = tx_ring->que->adapter;
237 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
238 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
239 next_to_clean = tx_ring->next_to_clean;
242 if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS)
244 #endif /* DEV_NETMAP */
247 struct ena_tx_buffer *tx_info;
250 rc = ena_com_tx_comp_req_id_get(io_cq, &req_id);
251 if (unlikely(rc != 0))
254 rc = validate_tx_req_id(tx_ring, req_id);
255 if (unlikely(rc != 0))
258 tx_info = &tx_ring->tx_buffer_info[req_id];
260 mbuf = tx_info->mbuf;
262 tx_info->mbuf = NULL;
263 bintime_clear(&tx_info->timestamp);
265 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
266 BUS_DMASYNC_POSTWRITE);
267 bus_dmamap_unload(adapter->tx_buf_tag,
270 ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d mbuf %p completed\n",
275 total_done += tx_info->tx_descs;
277 tx_ring->free_tx_ids[next_to_clean] = req_id;
278 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
281 if (unlikely(--commit == 0)) {
283 /* update ring state every TX_COMMIT descriptor */
284 tx_ring->next_to_clean = next_to_clean;
286 &adapter->ena_dev->io_sq_queues[ena_qid],
288 ena_com_update_dev_comp_head(io_cq);
291 } while (likely(--budget));
293 work_done = TX_BUDGET - budget;
295 ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d done. total pkts: %d\n",
296 tx_ring->qid, work_done);
298 /* If there is still something to commit update ring state */
299 if (likely(commit != TX_COMMIT)) {
300 tx_ring->next_to_clean = next_to_clean;
301 ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
303 ena_com_update_dev_comp_head(io_cq);
307 * Need to make the rings circular update visible to
308 * ena_xmit_mbuf() before checking for tx_ring->running.
312 above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
313 ENA_TX_RESUME_THRESH);
314 if (unlikely(!tx_ring->running && above_thresh)) {
315 ENA_RING_MTX_LOCK(tx_ring);
317 ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
318 ENA_TX_RESUME_THRESH);
319 if (!tx_ring->running && above_thresh) {
320 tx_ring->running = true;
321 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
322 taskqueue_enqueue(tx_ring->enqueue_tq,
323 &tx_ring->enqueue_task);
325 ENA_RING_MTX_UNLOCK(tx_ring);
332 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
335 struct ena_adapter *adapter = rx_ring->adapter;
337 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
338 mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
340 if (ena_rx_ctx->frag &&
341 (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
342 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
346 switch (ena_rx_ctx->l3_proto) {
347 case ENA_ETH_IO_L3_PROTO_IPV4:
348 switch (ena_rx_ctx->l4_proto) {
349 case ENA_ETH_IO_L4_PROTO_TCP:
350 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
352 case ENA_ETH_IO_L4_PROTO_UDP:
353 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
356 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
359 case ENA_ETH_IO_L3_PROTO_IPV6:
360 switch (ena_rx_ctx->l4_proto) {
361 case ENA_ETH_IO_L4_PROTO_TCP:
362 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
364 case ENA_ETH_IO_L4_PROTO_UDP:
365 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
368 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
371 case ENA_ETH_IO_L3_PROTO_UNKNOWN:
372 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
375 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
378 mbuf->m_pkthdr.flowid = rx_ring->qid;
379 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
384 * ena_rx_mbuf - assemble mbuf from descriptors
385 * @rx_ring: ring for which we want to clean packets
386 * @ena_bufs: buffer info
387 * @ena_rx_ctx: metadata for this packet(s)
388 * @next_to_clean: ring pointer, will be updated only upon success
392 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
393 struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
396 struct ena_rx_buffer *rx_info;
397 struct ena_adapter *adapter;
398 unsigned int descs = ena_rx_ctx->descs;
400 uint16_t ntc, len, req_id, buf = 0;
402 ntc = *next_to_clean;
403 adapter = rx_ring->adapter;
405 len = ena_bufs[buf].len;
406 req_id = ena_bufs[buf].req_id;
407 rc = validate_rx_req_id(rx_ring, req_id);
408 if (unlikely(rc != 0))
411 rx_info = &rx_ring->rx_buffer_info[req_id];
412 if (unlikely(rx_info->mbuf == NULL)) {
413 device_printf(adapter->pdev, "NULL mbuf in rx_info");
417 ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx\n",
418 rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
420 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
421 BUS_DMASYNC_POSTREAD);
422 mbuf = rx_info->mbuf;
423 mbuf->m_flags |= M_PKTHDR;
424 mbuf->m_pkthdr.len = len;
426 mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
428 /* Fill mbuf with hash key and it's interpretation for optimization */
429 ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
431 ena_trace(ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d\n",
432 mbuf, mbuf->m_flags, mbuf->m_pkthdr.len);
434 /* DMA address is not needed anymore, unmap it */
435 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
437 rx_info->mbuf = NULL;
438 rx_ring->free_rx_ids[ntc] = req_id;
439 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
442 * While we have more than 1 descriptors for one rcvd packet, append
443 * other mbufs to the main one
447 len = ena_bufs[buf].len;
448 req_id = ena_bufs[buf].req_id;
449 rc = validate_rx_req_id(rx_ring, req_id);
450 if (unlikely(rc != 0)) {
452 * If the req_id is invalid, then the device will be
453 * reset. In that case we must free all mbufs that
454 * were already gathered.
459 rx_info = &rx_ring->rx_buffer_info[req_id];
461 if (unlikely(rx_info->mbuf == NULL)) {
462 device_printf(adapter->pdev, "NULL mbuf in rx_info");
464 * If one of the required mbufs was not allocated yet,
465 * we can break there.
466 * All earlier used descriptors will be reallocated
467 * later and not used mbufs can be reused.
468 * The next_to_clean pointer will not be updated in case
469 * of an error, so caller should advance it manually
470 * in error handling routine to keep it up to date
477 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
478 BUS_DMASYNC_POSTREAD);
479 if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
480 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
481 ena_trace(ENA_WARNING, "Failed to append Rx mbuf %p\n",
485 ena_trace(ENA_DBG | ENA_RXPTH,
486 "rx mbuf updated. len %d\n", mbuf->m_pkthdr.len);
488 /* Free already appended mbuf, it won't be useful anymore */
489 bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
490 m_freem(rx_info->mbuf);
491 rx_info->mbuf = NULL;
493 rx_ring->free_rx_ids[ntc] = req_id;
494 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
497 *next_to_clean = ntc;
503 * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
506 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
510 /* if IP and error */
511 if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
512 ena_rx_ctx->l3_csum_err)) {
513 /* ipv4 checksum error */
514 mbuf->m_pkthdr.csum_flags = 0;
515 counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
516 ena_trace(ENA_DBG, "RX IPv4 header checksum error\n");
521 if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
522 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
523 if (ena_rx_ctx->l4_csum_err) {
524 /* TCP/UDP checksum error */
525 mbuf->m_pkthdr.csum_flags = 0;
526 counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
527 ena_trace(ENA_DBG, "RX L4 checksum error\n");
529 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
530 mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
536 * ena_rx_cleanup - handle rx irq
537 * @arg: ring for which irq is being handled
540 ena_rx_cleanup(struct ena_ring *rx_ring)
542 struct ena_adapter *adapter;
544 struct ena_com_rx_ctx ena_rx_ctx;
545 struct ena_com_io_cq* io_cq;
546 struct ena_com_io_sq* io_sq;
549 uint16_t next_to_clean;
550 uint32_t refill_required;
551 uint32_t refill_threshold;
552 uint32_t do_if_input = 0;
555 int budget = RX_BUDGET;
558 #endif /* DEV_NETMAP */
560 adapter = rx_ring->que->adapter;
562 qid = rx_ring->que->id;
563 ena_qid = ENA_IO_RXQ_IDX(qid);
564 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
565 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
566 next_to_clean = rx_ring->next_to_clean;
569 if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS)
571 #endif /* DEV_NETMAP */
573 ena_trace(ENA_DBG, "rx: qid %d\n", qid);
576 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
577 ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
578 ena_rx_ctx.descs = 0;
579 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
580 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD);
581 rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
583 if (unlikely(rc != 0))
586 if (unlikely(ena_rx_ctx.descs == 0))
589 ena_trace(ENA_DBG | ENA_RXPTH, "rx: q %d got packet from ena. "
590 "descs #: %d l3 proto %d l4 proto %d hash: %x\n",
591 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
592 ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
594 /* Receive mbuf from the ring */
595 mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs,
596 &ena_rx_ctx, &next_to_clean);
597 bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
598 io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD);
599 /* Exit if we failed to retrieve a buffer */
600 if (unlikely(mbuf == NULL)) {
601 for (i = 0; i < ena_rx_ctx.descs; ++i) {
602 rx_ring->free_rx_ids[next_to_clean] =
603 rx_ring->ena_bufs[i].req_id;
605 ENA_RX_RING_IDX_NEXT(next_to_clean,
612 if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) ||
613 ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) {
614 ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
618 counter_u64_add_protected(rx_ring->rx_stats.bytes,
620 counter_u64_add_protected(adapter->hw_stats.rx_bytes,
624 * LRO is only for IP/TCP packets and TCP checksum of the packet
625 * should be computed by hardware.
628 if (((ifp->if_capenable & IFCAP_LRO) != 0) &&
629 ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
630 (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
632 * Send to the stack if:
633 * - LRO not enabled, or
634 * - no LRO resources, or
635 * - lro enqueue fails
637 if ((rx_ring->lro.lro_cnt != 0) &&
638 (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
641 if (do_if_input != 0) {
642 ena_trace(ENA_DBG | ENA_RXPTH,
643 "calling if_input() with mbuf %p\n", mbuf);
644 (*ifp->if_input)(ifp, mbuf);
648 counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
649 counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
653 rx_ring->next_to_clean = next_to_clean;
655 refill_required = ena_com_free_desc(io_sq);
656 refill_threshold = min_t(int,
657 rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
658 ENA_RX_REFILL_THRESH_PACKET);
660 if (refill_required > refill_threshold) {
661 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
662 ena_refill_rx_bufs(rx_ring, refill_required);
665 tcp_lro_flush_all(&rx_ring->lro);
667 return (RX_BUDGET - budget);
670 counter_u64_add(rx_ring->rx_stats.bad_desc_num, 1);
672 /* Too many desc from the device. Trigger reset */
673 if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
674 adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
675 ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
682 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf)
684 struct ena_com_tx_meta *ena_meta;
685 struct ether_vlan_header *eh;
686 struct mbuf *mbuf_next;
697 ena_meta = &ena_tx_ctx->ena_meta;
698 mss = mbuf->m_pkthdr.tso_segsz;
703 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
706 if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
710 ena_tx_ctx->meta_valid = 0;
714 /* Determine where frame payload starts. */
715 eh = mtod(mbuf, struct ether_vlan_header *);
716 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
717 etype = ntohs(eh->evl_proto);
718 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
720 etype = ntohs(eh->evl_encap_proto);
721 ehdrlen = ETHER_HDR_LEN;
724 mbuf_next = m_getptr(mbuf, ehdrlen, &offset);
725 ip = (struct ip *)(mtodo(mbuf_next, offset));
726 iphlen = ip->ip_hl << 2;
728 mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset);
729 th = (struct tcphdr *)(mtodo(mbuf_next, offset));
731 if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
732 ena_tx_ctx->l3_csum_enable = 1;
734 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
735 ena_tx_ctx->tso_enable = 1;
736 ena_meta->l4_hdr_len = (th->th_off);
741 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
742 if ((ip->ip_off & htons(IP_DF)) != 0)
746 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
752 if (ip->ip_p == IPPROTO_TCP) {
753 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
754 if ((mbuf->m_pkthdr.csum_flags &
755 (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0)
756 ena_tx_ctx->l4_csum_enable = 1;
758 ena_tx_ctx->l4_csum_enable = 0;
759 } else if (ip->ip_p == IPPROTO_UDP) {
760 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
761 if ((mbuf->m_pkthdr.csum_flags &
762 (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0)
763 ena_tx_ctx->l4_csum_enable = 1;
765 ena_tx_ctx->l4_csum_enable = 0;
767 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
768 ena_tx_ctx->l4_csum_enable = 0;
772 ena_meta->l3_hdr_len = iphlen;
773 ena_meta->l3_hdr_offset = ehdrlen;
774 ena_tx_ctx->meta_valid = 1;
778 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
780 struct ena_adapter *adapter;
781 struct mbuf *collapsed_mbuf;
784 adapter = tx_ring->adapter;
785 num_frags = ena_mbuf_count(*mbuf);
787 /* One segment must be reserved for configuration descriptor. */
788 if (num_frags < adapter->max_tx_sgl_size)
790 counter_u64_add(tx_ring->tx_stats.collapse, 1);
792 collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT,
793 adapter->max_tx_sgl_size - 1);
794 if (unlikely(collapsed_mbuf == NULL)) {
795 counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
799 /* If mbuf was collapsed succesfully, original mbuf is released. */
800 *mbuf = collapsed_mbuf;
806 ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info,
807 struct mbuf *mbuf, void **push_hdr, u16 *header_len)
809 struct ena_adapter *adapter = tx_ring->adapter;
810 struct ena_com_buf *ena_buf;
811 bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
813 uint32_t mbuf_head_len, frag_len;
814 uint16_t push_len = 0;
818 mbuf_head_len = mbuf->m_len;
819 tx_info->mbuf = mbuf;
820 ena_buf = tx_info->bufs;
823 * For easier maintaining of the DMA map, map the whole mbuf even if
824 * the LLQ is used. The descriptors will be filled using the segments.
826 rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->dmamap, mbuf,
827 segs, &nsegs, BUS_DMA_NOWAIT);
828 if (unlikely((rc != 0) || (nsegs == 0))) {
829 ena_trace(ENA_WARNING,
830 "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs);
835 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
837 * When the device is LLQ mode, the driver will copy
838 * the header into the device memory space.
839 * the ena_com layer assumes the header is in a linear
841 * This assumption might be wrong since part of the header
842 * can be in the fragmented buffers.
843 * First check if header fits in the mbuf. If not, copy it to
844 * separate buffer that will be holding linearized data.
846 push_len = min_t(uint32_t, mbuf->m_pkthdr.len,
847 tx_ring->tx_max_header_size);
848 *header_len = push_len;
849 /* If header is in linear space, just point into mbuf's data. */
850 if (likely(push_len <= mbuf_head_len)) {
851 *push_hdr = mbuf->m_data;
853 * Otherwise, copy whole portion of header from multiple mbufs
854 * to intermediate buffer.
857 m_copydata(mbuf, 0, push_len,
858 tx_ring->push_buf_intermediate_buf);
859 *push_hdr = tx_ring->push_buf_intermediate_buf;
861 counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1);
862 delta = push_len - mbuf_head_len;
865 ena_trace(ENA_DBG | ENA_TXPTH,
866 "mbuf: %p header_buf->vaddr: %p push_len: %d\n",
867 mbuf, *push_hdr, push_len);
870 * If header was in linear memory space, map for the dma rest of the data
871 * in the first mbuf of the mbuf chain.
873 if (mbuf_head_len > push_len) {
874 ena_buf->paddr = segs[iseg].ds_addr + push_len;
875 ena_buf->len = segs[iseg].ds_len - push_len;
877 tx_info->num_of_bufs++;
880 * Advance the seg index as either the 1st mbuf was mapped or is
881 * a part of push_hdr.
887 * header_len is just a hint for the device. Because FreeBSD is not
888 * giving us information about packet header length and it is not
889 * guaranteed that all packet headers will be in the 1st mbuf, setting
890 * header_len to 0 is making the device ignore this value and resolve
891 * header on it's own.
897 * If header is in non linear space (delta > 0), then skip mbufs
898 * containing header and map the last one containing both header and the
900 * The first segment is already counted in.
901 * If LLQ is not supported, the loop will be skipped.
904 frag_len = segs[iseg].ds_len;
907 * If whole segment contains header just move to the
908 * next one and reduce delta.
910 if (unlikely(delta >= frag_len)) {
914 * Map rest of the packet data that was contained in
917 ena_buf->paddr = segs[iseg].ds_addr + delta;
918 ena_buf->len = frag_len - delta;
920 tx_info->num_of_bufs++;
931 /* Map rest of the mbuf */
932 while (iseg < nsegs) {
933 ena_buf->paddr = segs[iseg].ds_addr;
934 ena_buf->len = segs[iseg].ds_len;
937 tx_info->num_of_bufs++;
943 counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
944 tx_info->mbuf = NULL;
949 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
951 struct ena_adapter *adapter;
952 struct ena_tx_buffer *tx_info;
953 struct ena_com_tx_ctx ena_tx_ctx;
954 struct ena_com_dev *ena_dev;
955 struct ena_com_io_sq* io_sq;
957 uint16_t next_to_use;
964 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
965 adapter = tx_ring->que->adapter;
966 ena_dev = adapter->ena_dev;
967 io_sq = &ena_dev->io_sq_queues[ena_qid];
969 rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
970 if (unlikely(rc != 0)) {
971 ena_trace(ENA_WARNING,
972 "Failed to collapse mbuf! err: %d\n", rc);
976 ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len);
978 next_to_use = tx_ring->next_to_use;
979 req_id = tx_ring->free_tx_ids[next_to_use];
980 tx_info = &tx_ring->tx_buffer_info[req_id];
981 tx_info->num_of_bufs = 0;
983 rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len);
984 if (unlikely(rc != 0)) {
985 ena_trace(ENA_WARNING, "Failed to map TX mbuf\n");
988 memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
989 ena_tx_ctx.ena_bufs = tx_info->bufs;
990 ena_tx_ctx.push_header = push_hdr;
991 ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
992 ena_tx_ctx.req_id = req_id;
993 ena_tx_ctx.header_len = header_len;
995 /* Set flags and meta data */
996 ena_tx_csum(&ena_tx_ctx, *mbuf);
998 if (tx_ring->acum_pkts == DB_THRESHOLD ||
999 ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) {
1000 ena_trace(ENA_DBG | ENA_TXPTH,
1001 "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
1004 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
1005 counter_u64_add(tx_ring->tx_stats.doorbells, 1);
1006 tx_ring->acum_pkts = 0;
1009 /* Prepare the packet's descriptors and send them to device */
1010 rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
1011 if (unlikely(rc != 0)) {
1012 if (likely(rc == ENA_COM_NO_MEM)) {
1013 ena_trace(ENA_DBG | ENA_TXPTH,
1014 "tx ring[%d] if out of space\n", tx_ring->que->id);
1016 device_printf(adapter->pdev,
1017 "failed to prepare tx bufs\n");
1019 counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
1024 counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
1025 counter_u64_add_protected(tx_ring->tx_stats.bytes,
1026 (*mbuf)->m_pkthdr.len);
1028 counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
1029 counter_u64_add_protected(adapter->hw_stats.tx_bytes,
1030 (*mbuf)->m_pkthdr.len);
1033 tx_info->tx_descs = nb_hw_desc;
1034 getbinuptime(&tx_info->timestamp);
1035 tx_info->print_once = true;
1037 tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
1038 tx_ring->ring_size);
1040 /* stop the queue when no more space available, the packet can have up
1041 * to sgl_size + 2. one for the meta descriptor and one for header
1042 * (if the header is larger than tx_max_header_size).
1044 if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1045 adapter->max_tx_sgl_size + 2))) {
1046 ena_trace(ENA_DBG | ENA_TXPTH, "Stop queue %d\n",
1049 tx_ring->running = false;
1050 counter_u64_add(tx_ring->tx_stats.queue_stop, 1);
1052 /* There is a rare condition where this function decides to
1053 * stop the queue but meanwhile tx_cleanup() updates
1054 * next_to_completion and terminates.
1055 * The queue will remain stopped forever.
1056 * To solve this issue this function performs mb(), checks
1057 * the wakeup condition and wakes up the queue if needed.
1061 if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1062 ENA_TX_RESUME_THRESH)) {
1063 tx_ring->running = true;
1064 counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
1068 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1069 BUS_DMASYNC_PREWRITE);
1074 tx_info->mbuf = NULL;
1075 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1081 ena_start_xmit(struct ena_ring *tx_ring)
1084 struct ena_adapter *adapter = tx_ring->adapter;
1085 struct ena_com_io_sq* io_sq;
1089 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
1092 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)))
1095 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
1096 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
1098 while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
1099 ena_trace(ENA_DBG | ENA_TXPTH, "\ndequeued mbuf %p with flags %#x and"
1100 " header csum flags %#jx\n",
1101 mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
1103 if (unlikely(!tx_ring->running)) {
1104 drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1108 if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
1109 if (ret == ENA_COM_NO_MEM) {
1110 drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1111 } else if (ret == ENA_COM_NO_SPACE) {
1112 drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1115 drbr_advance(adapter->ifp, tx_ring->br);
1121 drbr_advance(adapter->ifp, tx_ring->br);
1123 if (unlikely((if_getdrvflags(adapter->ifp) &
1124 IFF_DRV_RUNNING) == 0))
1127 tx_ring->acum_pkts++;
1129 BPF_MTAP(adapter->ifp, mbuf);
1132 if (likely(tx_ring->acum_pkts != 0)) {
1134 /* Trigger the dma engine */
1135 ena_com_write_sq_doorbell(io_sq);
1136 counter_u64_add(tx_ring->tx_stats.doorbells, 1);
1137 tx_ring->acum_pkts = 0;
1140 if (unlikely(!tx_ring->running))
1141 taskqueue_enqueue(tx_ring->que->cleanup_tq,
1142 &tx_ring->que->cleanup_task);