2 * Copyright (C) 2016-2018 Vincenzo Maffione
3 * Copyright (C) 2015 Stefano Garzarella
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #if defined(__FreeBSD__)
34 #include <sys/cdefs.h>
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/types.h>
38 #include <sys/selinfo.h>
39 #include <sys/socket.h>
41 #include <net/if_var.h>
42 #include <machine/bus.h>
44 #define usleep_range(_1, _2) \
45 pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE)
49 #include <linux/file.h>
50 #include <linux/eventfd.h>
53 #include <net/netmap.h>
54 #include <dev/netmap/netmap_kern.h>
55 #include <net/netmap_virt.h>
56 #include <dev/netmap/netmap_mem2.h>
58 /* Support for eventfd-based notifications. */
60 #define SYNC_KLOOP_POLL
63 /* Write kring pointers (hwcur, hwtail) to the CSB.
64 * This routine is coupled with ptnetmap_guest_read_kring_csb(). */
66 sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur,
69 /* Issue a first store-store barrier to make sure writes to the
70 * netmap ring do not overcome updates on ktoa->hwcur and ktoa->hwtail. */
74 * The same scheme used in nm_sync_kloop_appl_write() applies here.
75 * We allow the application to read a value of hwcur more recent than the value
76 * of hwtail, since this would anyway result in a consistent view of the
77 * ring state (and hwcur can never wraparound hwtail, since hwcur must be
80 * The following memory barrier scheme is used to make this happen:
84 * STORE(hwcur) LOAD(hwtail)
85 * wmb() <-------------> rmb()
86 * STORE(hwtail) LOAD(hwcur)
88 CSB_WRITE(ptr, hwcur, hwcur);
90 CSB_WRITE(ptr, hwtail, hwtail);
93 /* Read kring pointers (head, cur, sync_flags) from the CSB.
94 * This routine is coupled with ptnetmap_guest_write_kring_csb(). */
96 sync_kloop_kernel_read(struct nm_csb_atok __user *ptr,
97 struct netmap_ring *shadow_ring,
101 * We place a memory barrier to make sure that the update of head never
102 * overtakes the update of cur.
103 * (see explanation in sync_kloop_kernel_write).
105 CSB_READ(ptr, head, shadow_ring->head);
107 CSB_READ(ptr, cur, shadow_ring->cur);
108 CSB_READ(ptr, sync_flags, shadow_ring->flags);
110 /* Make sure that loads from atok->head and atok->cur are not delayed
111 * after the loads from the netmap ring. */
115 /* Enable or disable application --> kernel kicks. */
117 csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val)
119 CSB_WRITE(csb_ktoa, kern_need_kick, val);
122 #ifdef SYNC_KLOOP_POLL
123 /* Are application interrupt enabled or disabled? */
124 static inline uint32_t
125 csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok)
129 CSB_READ(csb_atok, appl_need_kick, v);
133 #endif /* SYNC_KLOOP_POLL */
136 sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring)
138 nm_prinf("%s, kring %s, hwcur %d, rhead %d, "
139 "rcur %d, rtail %d, hwtail %d",
140 title, kring->name, kring->nr_hwcur, kring->rhead,
141 kring->rcur, kring->rtail, kring->nr_hwtail);
144 /* Arguments for netmap_sync_kloop_tx_ring() and
145 * netmap_sync_kloop_rx_ring().
147 struct sync_kloop_ring_args {
148 struct netmap_kring *kring;
149 struct nm_csb_atok *csb_atok;
150 struct nm_csb_ktoa *csb_ktoa;
151 #ifdef SYNC_KLOOP_POLL
152 struct eventfd_ctx *irq_ctx;
153 #endif /* SYNC_KLOOP_POLL */
154 /* Are we busy waiting rather than using a schedule() loop ? */
156 /* Are we processing in the context of VM exit ? */
161 netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a)
163 struct netmap_kring *kring = a->kring;
164 struct nm_csb_atok *csb_atok = a->csb_atok;
165 struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
166 struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
167 #ifdef SYNC_KLOOP_POLL
168 bool more_txspace = false;
169 #endif /* SYNC_KLOOP_POLL */
173 if (unlikely(nm_kr_tryget(kring, 1, NULL))) {
177 num_slots = kring->nkr_num_slots;
179 /* Disable application --> kernel notifications. */
181 csb_ktoa_kick_enable(csb_ktoa, 0);
183 /* Copy the application kring pointers from the CSB */
184 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
187 batch = shadow_ring.head - kring->nr_hwcur;
191 #ifdef PTN_TX_BATCH_LIM
192 if (batch > PTN_TX_BATCH_LIM(num_slots)) {
193 /* If application moves ahead too fast, let's cut the move so
194 * that we don't exceed our batch limit. */
195 uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots);
197 if (head_lim >= num_slots)
198 head_lim -= num_slots;
199 nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head,
201 shadow_ring.head = head_lim;
202 batch = PTN_TX_BATCH_LIM(num_slots);
204 #endif /* PTN_TX_BATCH_LIM */
206 if (nm_kr_txspace(kring) <= (num_slots >> 1)) {
207 shadow_ring.flags |= NAF_FORCE_RECLAIM;
210 /* Netmap prologue */
211 shadow_ring.tail = kring->rtail;
212 if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) {
213 /* Reinit ring and enable notifications. */
214 netmap_ring_reinit(kring);
216 csb_ktoa_kick_enable(csb_ktoa, 1);
221 if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
222 sync_kloop_kring_dump("pre txsync", kring);
225 if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
227 /* Re-enable notifications. */
228 csb_ktoa_kick_enable(csb_ktoa, 1);
230 nm_prerr("txsync() failed");
236 * Copy kernel hwcur and hwtail into the CSB for the application sync(), and
237 * do the nm_sync_finalize.
239 sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur,
241 if (kring->rtail != kring->nr_hwtail) {
242 /* Some more room available in the parent adapter. */
243 kring->rtail = kring->nr_hwtail;
244 #ifdef SYNC_KLOOP_POLL
246 #endif /* SYNC_KLOOP_POLL */
249 if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
250 sync_kloop_kring_dump("post txsync", kring);
253 /* Interrupt the application if needed. */
254 #ifdef SYNC_KLOOP_POLL
255 if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
256 /* We could disable kernel --> application kicks here,
257 * to avoid spurious interrupts. */
258 eventfd_signal(a->irq_ctx, 1);
259 more_txspace = false;
261 #endif /* SYNC_KLOOP_POLL */
263 /* Read CSB to see if there is more work to do. */
264 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
265 if (shadow_ring.head == kring->rhead) {
270 * No more packets to transmit. We enable notifications and
271 * go to sleep, waiting for a kick from the application when new
272 * new slots are ready for transmission.
274 /* Re-enable notifications. */
275 csb_ktoa_kick_enable(csb_ktoa, 1);
276 /* Double check, with store-load memory barrier. */
278 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
279 if (shadow_ring.head != kring->rhead) {
280 /* We won the race condition, there are more packets to
281 * transmit. Disable notifications and do another cycle */
282 csb_ktoa_kick_enable(csb_ktoa, 0);
288 if (nm_kr_txempty(kring)) {
289 /* No more available TX slots. We stop waiting for a notification
290 * from the backend (netmap_tx_irq). */
291 nm_prdis(1, "TX ring");
298 #ifdef SYNC_KLOOP_POLL
299 if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
300 eventfd_signal(a->irq_ctx, 1);
302 #endif /* SYNC_KLOOP_POLL */
305 /* RX cycle without receive any packets */
306 #define SYNC_LOOP_RX_DRY_CYCLES_MAX 2
309 sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head)
311 return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head,
312 kring->nkr_num_slots - 1));
316 netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a)
319 struct netmap_kring *kring = a->kring;
320 struct nm_csb_atok *csb_atok = a->csb_atok;
321 struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
322 struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
324 #ifdef SYNC_KLOOP_POLL
325 bool some_recvd = false;
326 #endif /* SYNC_KLOOP_POLL */
329 if (unlikely(nm_kr_tryget(kring, 1, NULL))) {
333 num_slots = kring->nkr_num_slots;
335 /* Get RX csb_atok and csb_ktoa pointers from the CSB. */
336 num_slots = kring->nkr_num_slots;
338 /* Disable notifications. */
340 csb_ktoa_kick_enable(csb_ktoa, 0);
342 /* Copy the application kring pointers from the CSB */
343 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
348 /* Netmap prologue */
349 shadow_ring.tail = kring->rtail;
350 if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) {
351 /* Reinit ring and enable notifications. */
352 netmap_ring_reinit(kring);
354 csb_ktoa_kick_enable(csb_ktoa, 1);
359 if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
360 sync_kloop_kring_dump("pre rxsync", kring);
363 if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
365 /* Re-enable notifications. */
366 csb_ktoa_kick_enable(csb_ktoa, 1);
368 nm_prerr("rxsync() failed");
374 * Copy kernel hwcur and hwtail into the CSB for the application sync()
376 hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
377 sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail);
378 if (kring->rtail != hwtail) {
379 kring->rtail = hwtail;
380 #ifdef SYNC_KLOOP_POLL
382 #endif /* SYNC_KLOOP_POLL */
388 if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
389 sync_kloop_kring_dump("post rxsync", kring);
392 #ifdef SYNC_KLOOP_POLL
393 /* Interrupt the application if needed. */
394 if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
395 /* We could disable kernel --> application kicks here,
396 * to avoid spurious interrupts. */
397 eventfd_signal(a->irq_ctx, 1);
400 #endif /* SYNC_KLOOP_POLL */
402 /* Read CSB to see if there is more work to do. */
403 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
404 if (sync_kloop_norxslots(kring, shadow_ring.head)) {
409 * No more slots available for reception. We enable notification and
410 * go to sleep, waiting for a kick from the application when new receive
411 * slots are available.
413 /* Re-enable notifications. */
414 csb_ktoa_kick_enable(csb_ktoa, 1);
415 /* Double check, with store-load memory barrier. */
417 sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
418 if (!sync_kloop_norxslots(kring, shadow_ring.head)) {
419 /* We won the race condition, more slots are available. Disable
420 * notifications and do another cycle. */
421 csb_ktoa_kick_enable(csb_ktoa, 0);
427 hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
428 if (unlikely(hwtail == kring->rhead ||
429 dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) {
430 /* No more packets to be read from the backend. We stop and
431 * wait for a notification from the backend (netmap_rx_irq). */
432 nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d",
433 hwtail, kring->rhead, dry_cycles);
440 #ifdef SYNC_KLOOP_POLL
441 /* Interrupt the application if needed. */
442 if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
443 eventfd_signal(a->irq_ctx, 1);
445 #endif /* SYNC_KLOOP_POLL */
448 #ifdef SYNC_KLOOP_POLL
449 struct sync_kloop_poll_ctx;
450 struct sync_kloop_poll_entry {
451 /* Support for receiving notifications from
452 * a netmap ring or from the application. */
455 wait_queue_head_t *wqh;
457 /* Support for sending notifications to the application. */
458 struct eventfd_ctx *irq_ctx;
459 struct file *irq_filp;
461 /* Arguments for the ring processing function. Useful
462 * in case of custom wake-up function. */
463 struct sync_kloop_ring_args *args;
464 struct sync_kloop_poll_ctx *parent;
468 struct sync_kloop_poll_ctx {
469 poll_table wait_table;
470 unsigned int next_entry;
471 int (*next_wake_fun)(wait_queue_t *, unsigned, int, void *);
472 unsigned int num_entries;
473 unsigned int num_tx_rings;
474 unsigned int num_rings;
475 /* First num_tx_rings entries are for the TX kicks.
476 * Then the RX kicks entries follow. The last two
477 * entries are for TX irq, and RX irq. */
478 struct sync_kloop_poll_entry entries[0];
482 sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh,
485 struct sync_kloop_poll_ctx *poll_ctx =
486 container_of(pt, struct sync_kloop_poll_ctx, wait_table);
487 struct sync_kloop_poll_entry *entry = poll_ctx->entries +
488 poll_ctx->next_entry;
490 BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries);
493 /* Use the default wake up function. */
494 if (poll_ctx->next_wake_fun == NULL) {
495 init_waitqueue_entry(&entry->wait, current);
497 init_waitqueue_func_entry(&entry->wait,
498 poll_ctx->next_wake_fun);
500 add_wait_queue(wqh, &entry->wait);
504 sync_kloop_tx_kick_wake_fun(wait_queue_t *wait, unsigned mode,
505 int wake_flags, void *key)
507 struct sync_kloop_poll_entry *entry =
508 container_of(wait, struct sync_kloop_poll_entry, wait);
510 netmap_sync_kloop_tx_ring(entry->args);
516 sync_kloop_tx_irq_wake_fun(wait_queue_t *wait, unsigned mode,
517 int wake_flags, void *key)
519 struct sync_kloop_poll_entry *entry =
520 container_of(wait, struct sync_kloop_poll_entry, wait);
521 struct sync_kloop_poll_ctx *poll_ctx = entry->parent;
524 for (i = 0; i < poll_ctx->num_tx_rings; i++) {
525 struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx;
528 eventfd_signal(irq_ctx, 1);
536 sync_kloop_rx_kick_wake_fun(wait_queue_t *wait, unsigned mode,
537 int wake_flags, void *key)
539 struct sync_kloop_poll_entry *entry =
540 container_of(wait, struct sync_kloop_poll_entry, wait);
542 netmap_sync_kloop_rx_ring(entry->args);
548 sync_kloop_rx_irq_wake_fun(wait_queue_t *wait, unsigned mode,
549 int wake_flags, void *key)
551 struct sync_kloop_poll_entry *entry =
552 container_of(wait, struct sync_kloop_poll_entry, wait);
553 struct sync_kloop_poll_ctx *poll_ctx = entry->parent;
556 for (i = poll_ctx->num_tx_rings; i < poll_ctx->num_rings; i++) {
557 struct eventfd_ctx *irq_ctx = poll_ctx->entries[i].irq_ctx;
560 eventfd_signal(irq_ctx, 1);
566 #endif /* SYNC_KLOOP_POLL */
569 netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr)
571 struct nmreq_sync_kloop_start *req =
572 (struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body;
573 struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL;
574 #ifdef SYNC_KLOOP_POLL
575 struct sync_kloop_poll_ctx *poll_ctx = NULL;
576 #endif /* SYNC_KLOOP_POLL */
577 int num_rx_rings, num_tx_rings, num_rings;
578 struct sync_kloop_ring_args *args = NULL;
579 uint32_t sleep_us = req->sleep_us;
580 struct nm_csb_atok* csb_atok_base;
581 struct nm_csb_ktoa* csb_ktoa_base;
582 struct netmap_adapter *na;
583 struct nmreq_option *opt;
584 bool na_could_sleep = false;
585 bool busy_wait = true;
586 bool direct_tx = false;
587 bool direct_rx = false;
591 if (sleep_us > 1000000) {
592 /* We do not accept sleeping for more than a second. */
596 if (priv->np_nifp == NULL) {
599 mb(); /* make sure following reads are not from cache */
602 if (!nm_netmap_on(na)) {
607 /* Make sure the application is working in CSB mode. */
608 if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) {
610 nm_prerr("sync-kloop on %s requires "
611 "NETMAP_REQ_OPT_CSB option", na->name);
615 csb_atok_base = priv->np_csb_atok_base;
616 csb_ktoa_base = priv->np_csb_ktoa_base;
618 /* Make sure that no kloop is currently running. */
619 if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
622 priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING;
628 num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX];
629 num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX];
630 num_rings = num_tx_rings + num_rx_rings;
632 args = nm_os_malloc(num_rings * sizeof(args[0]));
638 /* Prepare the arguments for netmap_sync_kloop_tx_ring()
639 * and netmap_sync_kloop_rx_ring(). */
640 for (i = 0; i < num_tx_rings; i++) {
641 struct sync_kloop_ring_args *a = args + i;
643 a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]];
644 a->csb_atok = csb_atok_base + i;
645 a->csb_ktoa = csb_ktoa_base + i;
646 a->busy_wait = busy_wait;
647 a->direct = direct_tx;
649 for (i = 0; i < num_rx_rings; i++) {
650 struct sync_kloop_ring_args *a = args + num_tx_rings + i;
652 a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]];
653 a->csb_atok = csb_atok_base + num_tx_rings + i;
654 a->csb_ktoa = csb_ktoa_base + num_tx_rings + i;
655 a->busy_wait = busy_wait;
656 a->direct = direct_rx;
659 /* Validate notification options. */
660 opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_SYNC_KLOOP_MODE);
662 struct nmreq_opt_sync_kloop_mode *mode_opt =
663 (struct nmreq_opt_sync_kloop_mode *)opt;
665 direct_tx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_TX);
666 direct_rx = !!(mode_opt->mode & NM_OPT_SYNC_KLOOP_DIRECT_RX);
667 if (mode_opt->mode & ~(NM_OPT_SYNC_KLOOP_DIRECT_TX |
668 NM_OPT_SYNC_KLOOP_DIRECT_RX)) {
669 opt->nro_status = err = EINVAL;
674 opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS);
676 if (opt->nro_size != sizeof(*eventfds_opt) +
677 sizeof(eventfds_opt->eventfds[0]) * num_rings) {
678 /* Option size not consistent with the number of
680 opt->nro_status = err = EINVAL;
683 #ifdef SYNC_KLOOP_POLL
684 eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt;
687 /* Check if some ioeventfd entry is not defined, and force sleep
688 * synchronization in that case. */
690 for (i = 0; i < num_rings; i++) {
691 if (eventfds_opt->eventfds[i].ioeventfd < 0) {
697 if (busy_wait && (direct_tx || direct_rx)) {
698 /* For direct processing we need all the
699 * ioeventfds to be valid. */
700 opt->nro_status = err = EINVAL;
704 /* We need 2 poll entries for TX and RX notifications coming
705 * from the netmap adapter, plus one entries per ring for the
706 * notifications coming from the application. */
707 poll_ctx = nm_os_malloc(sizeof(*poll_ctx) +
708 (num_rings + 2) * sizeof(poll_ctx->entries[0]));
709 init_poll_funcptr(&poll_ctx->wait_table,
710 sync_kloop_poll_table_queue_proc);
711 poll_ctx->num_entries = 2 + num_rings;
712 poll_ctx->num_tx_rings = num_tx_rings;
713 poll_ctx->num_rings = num_rings;
714 poll_ctx->next_entry = 0;
715 poll_ctx->next_wake_fun = NULL;
717 if (direct_tx && (na->na_flags & NAF_BDG_MAYSLEEP)) {
718 /* In direct mode, VALE txsync is called from
719 * wake-up context, where it is not possible
722 na->na_flags &= ~NAF_BDG_MAYSLEEP;
723 na_could_sleep = true;
726 for (i = 0; i < num_rings + 2; i++) {
727 poll_ctx->entries[i].args = args + i;
728 poll_ctx->entries[i].parent = poll_ctx;
731 /* Poll for notifications coming from the applications through
733 for (i = 0; i < num_rings; i++, poll_ctx->next_entry++) {
734 struct eventfd_ctx *irq = NULL;
735 struct file *filp = NULL;
737 bool tx_ring = (i < num_tx_rings);
739 if (eventfds_opt->eventfds[i].irqfd >= 0) {
741 eventfds_opt->eventfds[i].irqfd);
746 irq = eventfd_ctx_fileget(filp);
752 poll_ctx->entries[i].irq_filp = filp;
753 poll_ctx->entries[i].irq_ctx = irq;
754 poll_ctx->entries[i].args->busy_wait = busy_wait;
755 /* Don't let netmap_sync_kloop_*x_ring() use
756 * IRQs in direct mode. */
757 poll_ctx->entries[i].args->irq_ctx =
758 ((tx_ring && direct_tx) ||
759 (!tx_ring && direct_rx)) ? NULL :
760 poll_ctx->entries[i].irq_ctx;
761 poll_ctx->entries[i].args->direct =
762 (tx_ring ? direct_tx : direct_rx);
766 eventfds_opt->eventfds[i].ioeventfd);
771 if (tx_ring && direct_tx) {
772 /* Override the wake up function
773 * so that it can directly call
774 * netmap_sync_kloop_tx_ring().
776 poll_ctx->next_wake_fun =
777 sync_kloop_tx_kick_wake_fun;
778 } else if (!tx_ring && direct_rx) {
779 /* Same for direct RX. */
780 poll_ctx->next_wake_fun =
781 sync_kloop_rx_kick_wake_fun;
783 poll_ctx->next_wake_fun = NULL;
785 mask = filp->f_op->poll(filp,
786 &poll_ctx->wait_table);
787 if (mask & POLLERR) {
794 /* Poll for notifications coming from the netmap rings bound to
795 * this file descriptor. */
798 /* In direct mode, override the wake up function so
799 * that it can forward the netmap_tx_irq() to the
801 poll_ctx->next_wake_fun = direct_tx ?
802 sync_kloop_tx_irq_wake_fun : NULL;
803 poll_wait(priv->np_filp, priv->np_si[NR_TX],
804 &poll_ctx->wait_table);
805 poll_ctx->next_entry++;
807 poll_ctx->next_wake_fun = direct_rx ?
808 sync_kloop_rx_irq_wake_fun : NULL;
809 poll_wait(priv->np_filp, priv->np_si[NR_RX],
810 &poll_ctx->wait_table);
811 poll_ctx->next_entry++;
814 #else /* SYNC_KLOOP_POLL */
815 opt->nro_status = EOPNOTSUPP;
817 #endif /* SYNC_KLOOP_POLL */
820 nm_prinf("kloop busy_wait %u, direct_tx %u, direct_rx %u, "
821 "na_could_sleep %u", busy_wait, direct_tx, direct_rx,
826 if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) {
830 #ifdef SYNC_KLOOP_POLL
832 /* It is important to set the task state as
833 * interruptible before processing any TX/RX ring,
834 * so that if a notification on ring Y comes after
835 * we have processed ring Y, but before we call
836 * schedule(), we don't miss it. This is true because
837 * the wake up function will change the the task state,
838 * and therefore the schedule_timeout() call below
839 * will observe the change).
841 set_current_state(TASK_INTERRUPTIBLE);
843 #endif /* SYNC_KLOOP_POLL */
845 /* Process all the TX rings bound to this file descriptor. */
846 for (i = 0; !direct_tx && i < num_tx_rings; i++) {
847 struct sync_kloop_ring_args *a = args + i;
848 netmap_sync_kloop_tx_ring(a);
851 /* Process all the RX rings bound to this file descriptor. */
852 for (i = 0; !direct_rx && i < num_rx_rings; i++) {
853 struct sync_kloop_ring_args *a = args + num_tx_rings + i;
854 netmap_sync_kloop_rx_ring(a);
858 /* Default synchronization method: sleep for a while. */
859 usleep_range(sleep_us, sleep_us);
861 #ifdef SYNC_KLOOP_POLL
863 /* Yield to the scheduler waiting for a notification
864 * to come either from netmap or the application. */
865 schedule_timeout(msecs_to_jiffies(3000));
867 #endif /* SYNC_KLOOP_POLL */
870 #ifdef SYNC_KLOOP_POLL
872 /* Stop polling from netmap and the eventfds, and deallocate
873 * the poll context. */
875 __set_current_state(TASK_RUNNING);
877 for (i = 0; i < poll_ctx->next_entry; i++) {
878 struct sync_kloop_poll_entry *entry =
879 poll_ctx->entries + i;
882 remove_wait_queue(entry->wqh, &entry->wait);
883 /* We did not get a reference to the eventfds, but
884 * don't do that on netmap file descriptors (since
885 * a reference was not taken. */
886 if (entry->filp && entry->filp != priv->np_filp)
889 eventfd_ctx_put(entry->irq_ctx);
891 fput(entry->irq_filp);
893 nm_os_free(poll_ctx);
896 #endif /* SYNC_KLOOP_POLL */
903 /* Reset the kloop state. */
905 priv->np_kloop_state = 0;
906 if (na_could_sleep) {
907 na->na_flags |= NAF_BDG_MAYSLEEP;
915 netmap_sync_kloop_stop(struct netmap_priv_d *priv)
917 struct netmap_adapter *na;
921 if (priv->np_nifp == NULL) {
924 mb(); /* make sure following reads are not from cache */
927 if (!nm_netmap_on(na)) {
931 /* Set the kloop stopping flag. */
933 priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING;
936 /* Send a notification to the kloop, in case it is blocked in
937 * schedule_timeout(). We can use either RX or TX, because the
938 * kloop is waiting on both. */
939 nm_os_selwakeup(priv->np_si[NR_RX]);
941 /* Wait for the kloop to actually terminate. */
943 usleep_range(1000, 1500);
945 running = (NM_ACCESS_ONCE(priv->np_kloop_state)
946 & NM_SYNC_KLOOP_RUNNING);
955 * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers.
956 * These routines are reused across the different operating systems supported
961 * Reconcile host and guest views of the transmit ring.
963 * Guest user wants to transmit packets up to the one before ring->head,
964 * and guest kernel knows tx_ring->hwcur is the first packet unsent
965 * by the host kernel.
967 * We push out as many packets as possible, and possibly
968 * reclaim buffers from previously completed transmission.
970 * Notifications from the host are enabled only if the user guest would
971 * block (no space in the ring).
974 netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
975 struct netmap_kring *kring, int flags)
979 /* Disable notifications */
980 atok->appl_need_kick = 0;
983 * First part: tell the host to process the new packets,
986 kring->nr_hwcur = ktoa->hwcur;
987 nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead);
989 /* Ask for a kick from a guest to the host if needed. */
990 if (((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring))
991 && NM_ACCESS_ONCE(ktoa->kern_need_kick)) ||
992 (flags & NAF_FORCE_RECLAIM)) {
993 atok->sync_flags = flags;
998 * Second part: reclaim buffers for completed transmissions.
1000 if (nm_kr_wouldblock(kring) || (flags & NAF_FORCE_RECLAIM)) {
1001 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
1006 * No more room in the ring for new transmissions. The user thread will
1007 * go to sleep and we need to be notified by the host when more free
1008 * space is available.
1010 if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
1011 /* Re-enable notifications. */
1012 atok->appl_need_kick = 1;
1013 /* Double check, with store-load memory barrier. */
1015 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
1017 /* If there is new free space, disable notifications */
1018 if (unlikely(!nm_kr_wouldblock(kring))) {
1019 atok->appl_need_kick = 0;
1023 nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
1024 kring->name, atok->head, atok->cur, ktoa->hwtail,
1025 kring->rhead, kring->rcur, kring->nr_hwtail);
1031 * Reconcile host and guest view of the receive ring.
1033 * Update hwcur/hwtail from host (reading from CSB).
1035 * If guest user has released buffers up to the one before ring->head, we
1036 * also give them to the host.
1038 * Notifications from the host are enabled only if the user guest would
1039 * block (no more completed slots in the ring).
1042 netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
1043 struct netmap_kring *kring, int flags)
1045 bool notify = false;
1047 /* Disable notifications */
1048 atok->appl_need_kick = 0;
1051 * First part: import newly received packets, by updating the kring
1052 * hwtail to the hwtail known from the host (read from the CSB).
1053 * This also updates the kring hwcur.
1055 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur);
1056 kring->nr_kflags &= ~NKR_PENDINTR;
1059 * Second part: tell the host about the slots that guest user has
1060 * released, by updating cur and head in the CSB.
1062 if (kring->rhead != kring->nr_hwcur) {
1063 nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead);
1067 * No more completed RX slots. The user thread will go to sleep and
1068 * we need to be notified by the host when more RX slots have been
1071 if (nm_kr_wouldblock(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
1072 /* Re-enable notifications. */
1073 atok->appl_need_kick = 1;
1074 /* Double check, with store-load memory barrier. */
1076 nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail,
1078 /* If there are new slots, disable notifications. */
1079 if (!nm_kr_wouldblock(kring)) {
1080 atok->appl_need_kick = 0;
1084 /* Ask for a kick from the guest to the host if needed. */
1085 if ((kring->rhead != kring->nr_hwcur || nm_kr_wouldblock(kring))
1086 && NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
1087 atok->sync_flags = flags;
1091 nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
1092 kring->name, atok->head, atok->cur, ktoa->hwtail,
1093 kring->rhead, kring->rcur, kring->nr_hwtail);
1099 * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor.
1102 ptnet_nm_krings_create(struct netmap_adapter *na)
1104 struct netmap_pt_guest_adapter *ptna =
1105 (struct netmap_pt_guest_adapter *)na; /* Upcast. */
1106 struct netmap_adapter *na_nm = &ptna->hwup.up;
1107 struct netmap_adapter *na_dr = &ptna->dr.up;
1110 if (ptna->backend_users) {
1114 /* Create krings on the public netmap adapter. */
1115 ret = netmap_hw_krings_create(na_nm);
1120 /* Copy krings into the netmap adapter private to the driver. */
1121 na_dr->tx_rings = na_nm->tx_rings;
1122 na_dr->rx_rings = na_nm->rx_rings;
1128 ptnet_nm_krings_delete(struct netmap_adapter *na)
1130 struct netmap_pt_guest_adapter *ptna =
1131 (struct netmap_pt_guest_adapter *)na; /* Upcast. */
1132 struct netmap_adapter *na_nm = &ptna->hwup.up;
1133 struct netmap_adapter *na_dr = &ptna->dr.up;
1135 if (ptna->backend_users) {
1139 na_dr->tx_rings = NULL;
1140 na_dr->rx_rings = NULL;
1142 netmap_hw_krings_delete(na_nm);
1146 ptnet_nm_dtor(struct netmap_adapter *na)
1148 struct netmap_pt_guest_adapter *ptna =
1149 (struct netmap_pt_guest_adapter *)na;
1151 netmap_mem_put(ptna->dr.up.nm_mem);
1152 memset(&ptna->dr, 0, sizeof(ptna->dr));
1153 netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp);
1157 netmap_pt_guest_attach(struct netmap_adapter *arg,
1158 unsigned int nifp_offset, unsigned int memid)
1160 struct netmap_pt_guest_adapter *ptna;
1161 struct ifnet *ifp = arg ? arg->ifp : NULL;
1165 arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
1166 if (arg->nm_mem == NULL)
1168 arg->na_flags |= NAF_MEM_OWNER;
1169 error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1);
1173 /* get the netmap_pt_guest_adapter */
1174 ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
1176 /* Initialize a separate pass-through netmap adapter that is going to
1177 * be used by the ptnet driver only, and so never exposed to netmap
1178 * applications. We only need a subset of the available fields. */
1179 memset(&ptna->dr, 0, sizeof(ptna->dr));
1180 ptna->dr.up.ifp = ifp;
1181 ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
1182 ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
1184 ptna->backend_users = 0;
1189 #endif /* WITH_PTNETMAP */