2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (C) 2014-2016 Giuseppe Lettieri
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #if defined(__FreeBSD__)
32 #include <sys/cdefs.h> /* prerequisite */
34 #include <sys/types.h>
35 #include <sys/errno.h>
36 #include <sys/param.h> /* defines used in kernel.h */
37 #include <sys/kernel.h> /* types used in module initialization */
38 #include <sys/malloc.h>
41 #include <sys/rwlock.h>
42 #include <sys/selinfo.h>
43 #include <sys/sysctl.h>
44 #include <sys/socket.h> /* sockaddrs */
46 #include <net/if_var.h>
47 #include <machine/bus.h> /* bus_dmamap_* */
48 #include <sys/refcount.h>
55 #elif defined(__APPLE__)
57 #warning OSX support is only partial
65 #error Unsupported platform
67 #endif /* unsupported */
73 #include <net/netmap.h>
74 #include <dev/netmap/netmap_kern.h>
75 #include <dev/netmap/netmap_mem2.h>
79 #define NM_PIPE_MAXSLOTS 4096
80 #define NM_PIPE_MAXRINGS 256
82 static int netmap_default_pipes = 0; /* ignored, kept for compatibility */
84 SYSCTL_DECL(_dev_netmap);
85 SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW,
86 &netmap_default_pipes, 0, "For compatibility only");
89 /* allocate the pipe array in the parent adapter */
91 nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
94 struct netmap_pipe_adapter **npa;
96 if (npipes <= na->na_max_pipes)
97 /* we already have more entries that requested */
100 if (npipes < na->na_next_pipe || npipes > NM_MAXPIPES)
103 old_len = sizeof(struct netmap_pipe_adapter *)*na->na_max_pipes;
104 len = sizeof(struct netmap_pipe_adapter *) * npipes;
105 npa = nm_os_realloc(na->na_pipes, len, old_len);
110 na->na_max_pipes = npipes;
115 /* deallocate the parent array in the parent adapter */
117 netmap_pipe_dealloc(struct netmap_adapter *na)
120 if (na->na_next_pipe > 0) {
121 D("freeing not empty pipe array for %s (%d dangling pipes)!", na->name,
124 nm_os_free(na->na_pipes);
126 na->na_max_pipes = 0;
127 na->na_next_pipe = 0;
131 /* find a pipe endpoint with the given id among the parent's pipes */
132 static struct netmap_pipe_adapter *
133 netmap_pipe_find(struct netmap_adapter *parent, const char *pipe_id)
136 struct netmap_pipe_adapter *na;
138 for (i = 0; i < parent->na_next_pipe; i++) {
139 const char *na_pipe_id;
140 na = parent->na_pipes[i];
141 na_pipe_id = strrchr(na->up.name,
142 na->role == NM_PIPE_ROLE_MASTER ? '{' : '}');
143 KASSERT(na_pipe_id != NULL, ("Invalid pipe name"));
145 if (!strcmp(na_pipe_id, pipe_id)) {
152 /* add a new pipe endpoint to the parent array */
154 netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
156 if (parent->na_next_pipe >= parent->na_max_pipes) {
157 u_int npipes = parent->na_max_pipes ? 2*parent->na_max_pipes : 2;
158 int error = nm_pipe_alloc(parent, npipes);
163 parent->na_pipes[parent->na_next_pipe] = na;
164 na->parent_slot = parent->na_next_pipe;
165 parent->na_next_pipe++;
169 /* remove the given pipe endpoint from the parent array */
171 netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
174 n = --parent->na_next_pipe;
175 if (n != na->parent_slot) {
176 struct netmap_pipe_adapter **p =
177 &parent->na_pipes[na->parent_slot];
178 *p = parent->na_pipes[n];
179 (*p)->parent_slot = na->parent_slot;
181 parent->na_pipes[n] = NULL;
185 netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
187 struct netmap_kring *rxkring = txkring->pipe;
188 u_int k, lim = txkring->nkr_num_slots - 1;
189 int m; /* slots to transfer */
190 struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
192 ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
193 ND(20, "TX before: hwcur %d hwtail %d cur %d head %d tail %d",
194 txkring->nr_hwcur, txkring->nr_hwtail,
195 txkring->rcur, txkring->rhead, txkring->rtail);
197 m = txkring->rhead - txkring->nr_hwcur; /* new slots */
199 m += txkring->nkr_num_slots;
202 /* nothing to send */
206 for (k = txkring->nr_hwcur; m; m--, k = nm_next(k, lim)) {
207 struct netmap_slot *rs = &rxring->slot[k];
208 struct netmap_slot *ts = &txring->slot[k];
213 if (ts->flags & NS_BUF_CHANGED) {
214 rs->buf_idx = ts->buf_idx;
215 rs->flags |= NS_BUF_CHANGED;
216 ts->flags &= ~NS_BUF_CHANGED;
220 mb(); /* make sure the slots are updated before publishing them */
221 rxkring->nr_hwtail = k;
222 txkring->nr_hwcur = k;
224 ND(20, "TX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
225 txkring->nr_hwcur, txkring->nr_hwtail,
226 txkring->rcur, txkring->rhead, txkring->rtail, k);
228 rxkring->nm_notify(rxkring, 0);
234 netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
236 struct netmap_kring *txkring = rxkring->pipe;
237 u_int k, lim = rxkring->nkr_num_slots - 1;
238 int m; /* slots to release */
239 struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
241 ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
242 ND(20, "RX before: hwcur %d hwtail %d cur %d head %d tail %d",
243 rxkring->nr_hwcur, rxkring->nr_hwtail,
244 rxkring->rcur, rxkring->rhead, rxkring->rtail);
246 m = rxkring->rhead - rxkring->nr_hwcur; /* released slots */
248 m += rxkring->nkr_num_slots;
251 /* nothing to release */
255 for (k = rxkring->nr_hwcur; m; m--, k = nm_next(k, lim)) {
256 struct netmap_slot *rs = &rxring->slot[k];
257 struct netmap_slot *ts = &txring->slot[k];
259 if (rs->flags & NS_BUF_CHANGED) {
260 /* copy the slot and report the buffer change */
262 rs->flags &= ~NS_BUF_CHANGED;
266 mb(); /* make sure the slots are updated before publishing them */
267 txkring->nr_hwtail = nm_prev(k, lim);
268 rxkring->nr_hwcur = k;
270 ND(20, "RX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
271 rxkring->nr_hwcur, rxkring->nr_hwtail,
272 rxkring->rcur, rxkring->rhead, rxkring->rtail, k);
274 txkring->nm_notify(txkring, 0);
279 /* Pipe endpoints are created and destroyed together, so that endopoints do not
280 * have to check for the existence of their peer at each ?xsync.
282 * To play well with the existing netmap infrastructure (refcounts etc.), we
283 * adopt the following strategy:
285 * 1) The first endpoint that is created also creates the other endpoint and
286 * grabs a reference to it.
288 * state A) user1 --> endpoint1 --> endpoint2
290 * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
291 * its reference to the user:
293 * state B) user1 --> endpoint1 endpoint2 <--- user2
295 * 3) Assume that, starting from state B endpoint2 is closed. In the unregister
296 * callback endpoint2 notes that endpoint1 is still active and adds a reference
297 * from endpoint1 to itself. When user2 then releases her own reference,
298 * endpoint2 is not destroyed and we are back to state A. A symmetrical state
299 * would be reached if endpoint1 were released instead.
301 * 4) If, starting from state A, endpoint1 is closed, the destructor notes that
302 * it owns a reference to endpoint2 and releases it.
304 * Something similar goes on for the creation and destruction of the krings.
308 /* netmap_pipe_krings_create.
310 * There are two cases:
316 * and we are e1. We have to create both sets
323 * and we are e2. e1 is certainly registered and our
324 * krings already exist. Nothing to do.
327 netmap_pipe_krings_create(struct netmap_adapter *na)
329 struct netmap_pipe_adapter *pna =
330 (struct netmap_pipe_adapter *)na;
331 struct netmap_adapter *ona = &pna->peer->up;
339 ND("%p: case 1, create both ends", na);
340 error = netmap_krings_create(na, 0);
344 /* create the krings of the other end */
345 error = netmap_krings_create(ona, 0);
349 /* cross link the krings */
351 enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
352 for (i = 0; i < nma_get_nrings(na, t); i++) {
353 NMR(na, t)[i]->pipe = NMR(ona, r)[i];
354 NMR(ona, r)[i]->pipe = NMR(na, t)[i];
355 /* mark all peer-adapter rings as fake */
356 NMR(ona, r)[i]->nr_kflags |= NKR_FAKERING;
364 netmap_krings_delete(na);
371 * There are two cases on registration (onoff==1)
377 * and we are e1. Create the needed rings of the
382 * usr1 --> e1 --> e2 <-- usr2
384 * and we are e2. Drop the ref e1 is holding.
386 * There are two additional cases on unregister (onoff==0)
392 * and we are e1. Nothing special to do, e2 will
393 * be cleaned up by the destructor of e1.
397 * usr1 --> e1 e2 <-- usr2
399 * and we are either e1 or e2. Add a ref from the
403 netmap_pipe_reg(struct netmap_adapter *na, int onoff)
405 struct netmap_pipe_adapter *pna =
406 (struct netmap_pipe_adapter *)na;
407 struct netmap_adapter *ona = &pna->peer->up;
411 ND("%p: onoff %d", na, onoff);
414 for (i = 0; i < nma_get_nrings(na, t); i++) {
415 struct netmap_kring *kring = NMR(na, t)[i];
417 if (nm_kring_pending_on(kring)) {
418 /* mark the peer ring as needed */
419 kring->pipe->nr_kflags |= NKR_NEEDRING;
424 /* create all missing needed rings on the other end.
425 * Either our end, or the other, has been marked as
426 * fake, so the allocation will not be done twice.
428 error = netmap_mem_rings_create(ona);
432 /* In case of no error we put our rings in netmap mode */
434 for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
435 struct netmap_kring *kring = NMR(na, t)[i];
436 if (nm_kring_pending_on(kring)) {
437 struct netmap_kring *sring, *dring;
439 /* copy the buffers from the non-fake ring */
440 if (kring->nr_kflags & NKR_FAKERING) {
447 memcpy(dring->ring->slot,
449 sizeof(struct netmap_slot) *
450 sring->nkr_num_slots);
451 /* mark both rings as fake and needed,
452 * so that buffers will not be
453 * deleted by the standard machinery
454 * (we will delete them by ourselves in
455 * netmap_pipe_krings_delete)
458 (NKR_FAKERING | NKR_NEEDRING);
460 (NKR_FAKERING | NKR_NEEDRING);
461 kring->nr_mode = NKR_NETMAP_ON;
465 if (na->active_fds == 0)
466 na->na_flags |= NAF_NETMAP_ON;
468 if (na->active_fds == 0)
469 na->na_flags &= ~NAF_NETMAP_ON;
471 for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
472 struct netmap_kring *kring = NMR(na, t)[i];
474 if (nm_kring_pending_off(kring)) {
475 kring->nr_mode = NKR_NETMAP_OFF;
481 if (na->active_fds) {
482 ND("active_fds %d", na->active_fds);
487 ND("%p: case 1.a or 2.a, nothing to do", na);
491 ND("%p: case 1.b, drop peer", na);
492 pna->peer->peer_ref = 0;
493 netmap_adapter_put(na);
495 ND("%p: case 2.b, grab peer", na);
496 netmap_adapter_get(na);
497 pna->peer->peer_ref = 1;
502 /* netmap_pipe_krings_delete.
504 * There are two cases:
510 * and we are e1 (e2 is not registered, so krings_delete cannot be
515 * usr1 --> e1 e2 <-- usr2
517 * and we are either e1 or e2.
519 * In the former case we have to also delete the krings of e2;
520 * in the latter case we do nothing.
523 netmap_pipe_krings_delete(struct netmap_adapter *na)
525 struct netmap_pipe_adapter *pna =
526 (struct netmap_pipe_adapter *)na;
527 struct netmap_adapter *sna, *ona; /* na of the other end */
531 if (!pna->peer_ref) {
532 ND("%p: case 2, kept alive by peer", na);
535 ona = &pna->peer->up;
537 ND("%p: case 1, deleting everything", na);
538 /* To avoid double-frees we zero-out all the buffers in the kernel part
539 * of each ring. The reason is this: If the user is behaving correctly,
540 * all buffers are found in exactly one slot in the userspace part of
541 * some ring. If the user is not behaving correctly, we cannot release
542 * buffers cleanly anyway. In the latter case, the allocator will
543 * return to a clean state only when all its users will close.
548 for (i = 0; i < nma_get_nrings(sna, t) + 1; i++) {
549 struct netmap_kring *kring = NMR(sna, t)[i];
550 struct netmap_ring *ring = kring->ring;
551 uint32_t j, lim = kring->nkr_num_slots - 1;
553 ND("%s ring %p hwtail %u hwcur %u",
554 kring->name, ring, kring->nr_hwtail, kring->nr_hwcur);
559 if (kring->nr_hwtail == kring->nr_hwcur)
560 ring->slot[kring->nr_hwtail].buf_idx = 0;
562 for (j = nm_next(kring->nr_hwtail, lim);
563 j != kring->nr_hwcur;
566 ND("%s[%d] %u", kring->name, j, ring->slot[j].buf_idx);
567 ring->slot[j].buf_idx = 0;
569 kring->nr_kflags &= ~(NKR_FAKERING | NKR_NEEDRING);
573 if (sna != ona && ona->tx_rings) {
578 netmap_mem_rings_delete(na);
579 netmap_krings_delete(na); /* also zeroes tx_rings etc. */
581 if (ona->tx_rings == NULL) {
582 /* already deleted, we must be on an
583 * cleanup-after-error path */
586 netmap_mem_rings_delete(ona);
587 netmap_krings_delete(ona);
592 netmap_pipe_dtor(struct netmap_adapter *na)
594 struct netmap_pipe_adapter *pna =
595 (struct netmap_pipe_adapter *)na;
596 ND("%p %p", na, pna->parent_ifp);
598 ND("%p: clean up peer", na);
600 netmap_adapter_put(&pna->peer->up);
602 if (pna->role == NM_PIPE_ROLE_MASTER)
603 netmap_pipe_remove(pna->parent, pna);
605 if_rele(pna->parent_ifp);
606 netmap_adapter_put(pna->parent);
611 netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
612 struct netmap_mem_d *nmd, int create)
614 struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
615 struct netmap_adapter *pna; /* parent adapter */
616 struct netmap_pipe_adapter *mna, *sna, *reqna;
617 struct ifnet *ifp = NULL;
618 const char *pipe_id = NULL;
620 int error, retries = 0;
623 /* Try to parse the pipe syntax 'xx{yy' or 'xx}yy'. */
624 cbra = strrchr(hdr->nr_name, '{');
626 role = NM_PIPE_ROLE_MASTER;
628 cbra = strrchr(hdr->nr_name, '}');
630 role = NM_PIPE_ROLE_SLAVE;
637 if (*pipe_id == '\0' || cbra == hdr->nr_name) {
638 /* Bracket is the last character, so pipe name is missing;
639 * or bracket is the first character, so base port name
644 if (req->nr_mode != NR_REG_ALL_NIC && req->nr_mode != NR_REG_ONE_NIC) {
645 /* We only accept modes involving hardware rings. */
649 /* first, try to find the parent adapter */
651 char nr_name_orig[NETMAP_REQ_IFNAMSIZ];
654 /* Temporarily remove the pipe suffix. */
655 strncpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig));
657 error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
658 /* Restore the pipe suffix. */
659 strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
662 if (error != ENXIO || retries++) {
663 ND("parent lookup failed: %d", error);
666 ND("try to create a persistent vale port");
667 /* create a persistent vale port and try again */
670 create_error = netmap_vi_create(hdr, 1 /* autodelete */);
672 strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
673 if (create_error && create_error != EEXIST) {
674 if (create_error != EOPNOTSUPP) {
675 D("failed to create a persistent vale port: %d", create_error);
681 if (NETMAP_OWNED_BY_KERN(pna)) {
687 /* next, lookup the pipe id in the parent list */
689 mna = netmap_pipe_find(pna, pipe_id);
691 if (mna->role == role) {
692 ND("found %s directly at %d", pipe_id, mna->parent_slot);
695 ND("found %s indirectly at %d", pipe_id, mna->parent_slot);
698 /* the pipe we have found already holds a ref to the parent,
699 * so we need to drop the one we got from netmap_get_na()
701 netmap_unget_na(pna, ifp);
704 ND("pipe %s not found, create %d", pipe_id, create);
709 /* we create both master and slave.
710 * The endpoint we were asked for holds a reference to
713 mna = nm_os_malloc(sizeof(*mna));
718 snprintf(mna->up.name, sizeof(mna->up.name), "%s{%s", pna->name, pipe_id);
720 mna->role = NM_PIPE_ROLE_MASTER;
722 mna->parent_ifp = ifp;
724 mna->up.nm_txsync = netmap_pipe_txsync;
725 mna->up.nm_rxsync = netmap_pipe_rxsync;
726 mna->up.nm_register = netmap_pipe_reg;
727 mna->up.nm_dtor = netmap_pipe_dtor;
728 mna->up.nm_krings_create = netmap_pipe_krings_create;
729 mna->up.nm_krings_delete = netmap_pipe_krings_delete;
730 mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
731 mna->up.na_flags |= NAF_MEM_OWNER;
732 mna->up.na_lut = pna->na_lut;
734 mna->up.num_tx_rings = req->nr_tx_rings;
735 nm_bound_var(&mna->up.num_tx_rings, 1,
736 1, NM_PIPE_MAXRINGS, NULL);
737 mna->up.num_rx_rings = req->nr_rx_rings;
738 nm_bound_var(&mna->up.num_rx_rings, 1,
739 1, NM_PIPE_MAXRINGS, NULL);
740 mna->up.num_tx_desc = req->nr_tx_slots;
741 nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
742 1, NM_PIPE_MAXSLOTS, NULL);
743 mna->up.num_rx_desc = req->nr_rx_slots;
744 nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
745 1, NM_PIPE_MAXSLOTS, NULL);
746 error = netmap_attach_common(&mna->up);
749 /* register the master with the parent */
750 error = netmap_pipe_add(pna, mna);
754 /* create the slave */
755 sna = nm_os_malloc(sizeof(*mna));
760 /* most fields are the same, copy from master and then fix */
762 sna->up.nm_mem = netmap_mem_get(mna->up.nm_mem);
763 /* swap the number of tx/rx rings */
764 sna->up.num_tx_rings = mna->up.num_rx_rings;
765 sna->up.num_rx_rings = mna->up.num_tx_rings;
766 snprintf(sna->up.name, sizeof(sna->up.name), "%s}%s", pna->name, pipe_id);
767 sna->role = NM_PIPE_ROLE_SLAVE;
768 error = netmap_attach_common(&sna->up);
772 /* join the two endpoints */
776 /* we already have a reference to the parent, but we
777 * need another one for the other endpoint we created
779 netmap_adapter_get(pna);
780 /* likewise for the ifp, if any */
784 if (role == NM_PIPE_ROLE_MASTER) {
787 netmap_adapter_get(&sna->up);
791 netmap_adapter_get(&mna->up);
793 ND("created master %p and slave %p", mna, sna);
796 ND("pipe %s %s at %p", pipe_id,
797 (reqna->role == NM_PIPE_ROLE_MASTER ? "master" : "slave"), reqna);
799 netmap_adapter_get(*na);
801 /* keep the reference to the parent.
802 * It will be released by the req destructor
810 netmap_pipe_remove(pna, mna);
814 netmap_unget_na(pna, ifp);
819 #endif /* WITH_PIPES */