2 * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #if defined(__FreeBSD__)
29 #include <sys/cdefs.h> /* prerequisite */
31 #include <sys/types.h>
32 #include <sys/errno.h>
33 #include <sys/param.h> /* defines used in kernel.h */
34 #include <sys/kernel.h> /* types used in module initialization */
35 #include <sys/malloc.h>
38 #include <sys/rwlock.h>
39 #include <sys/selinfo.h>
40 #include <sys/sysctl.h>
41 #include <sys/socket.h> /* sockaddrs */
43 #include <net/if_var.h>
44 #include <machine/bus.h> /* bus_dmamap_* */
45 #include <sys/refcount.h>
52 #elif defined(__APPLE__)
54 #warning OSX support is only partial
59 #error Unsupported platform
61 #endif /* unsupported */
67 #include <net/netmap.h>
68 #include <dev/netmap/netmap_kern.h>
69 #include <dev/netmap/netmap_mem2.h>
73 #define NM_PIPE_MAXSLOTS 4096
75 int netmap_default_pipes = 0; /* default number of pipes for each nic */
76 SYSCTL_DECL(_dev_netmap);
77 SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , "");
79 /* allocate the pipe array in the parent adapter */
81 netmap_pipe_alloc(struct netmap_adapter *na, struct nmreq *nmr)
84 int mode = nmr->nr_flags & NR_REG_MASK;
87 if (mode == NR_REG_PIPE_MASTER || mode == NR_REG_PIPE_SLAVE) {
88 /* this is for our parent, not for us */
92 /* TODO: we can resize the array if the new
93 * request can accomodate the already existing pipes
96 nmr->nr_arg1 = na->na_max_pipes;
100 npipes = nmr->nr_arg1;
102 npipes = netmap_default_pipes;
103 nm_bound_var(&npipes, 0, 0, NM_MAXPIPES, NULL);
106 /* really zero, nothing to alloc */
110 len = sizeof(struct netmap_pipe_adapter *) * npipes;
111 na->na_pipes = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
112 if (na->na_pipes == NULL)
115 na->na_max_pipes = npipes;
116 na->na_next_pipe = 0;
119 nmr->nr_arg1 = npipes;
124 /* deallocate the parent array in the parent adapter */
126 netmap_pipe_dealloc(struct netmap_adapter *na)
129 ND("freeing pipes for %s", NM_IFPNAME(na->ifp));
130 free(na->na_pipes, M_DEVBUF);
132 na->na_max_pipes = 0;
133 na->na_next_pipe = 0;
137 /* find a pipe endpoint with the given id among the parent's pipes */
138 static struct netmap_pipe_adapter *
139 netmap_pipe_find(struct netmap_adapter *parent, u_int pipe_id)
142 struct netmap_pipe_adapter *na;
144 for (i = 0; i < parent->na_next_pipe; i++) {
145 na = parent->na_pipes[i];
146 if (na->id == pipe_id) {
153 /* add a new pipe endpoint to the parent array */
155 netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
157 if (parent->na_next_pipe >= parent->na_max_pipes) {
158 D("%s: no space left for pipes", NM_IFPNAME(parent->ifp));
162 parent->na_pipes[parent->na_next_pipe] = na;
163 na->parent_slot = parent->na_next_pipe;
164 parent->na_next_pipe++;
168 /* remove the given pipe endpoint from the parent array */
170 netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
173 n = --parent->na_next_pipe;
174 if (n != na->parent_slot) {
175 parent->na_pipes[na->parent_slot] =
178 parent->na_pipes[n] = NULL;
182 netmap_pipe_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
184 struct netmap_kring *txkring = na->tx_rings + ring_nr,
185 *rxkring = txkring->pipe;
186 u_int limit; /* slots to transfer */
187 u_int j, k, lim_tx = txkring->nkr_num_slots - 1,
188 lim_rx = rxkring->nkr_num_slots - 1;
191 ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
192 ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail,
193 txkring->rcur, txkring->rhead, txkring->rtail);
195 j = rxkring->nr_hwtail; /* RX */
196 k = txkring->nr_hwcur; /* TX */
197 m = txkring->rhead - txkring->nr_hwcur; /* new slots */
199 m += txkring->nkr_num_slots;
201 m = rxkring->nkr_num_slots - 1; /* max avail space on destination */
202 busy = j - rxkring->nr_hwcur; /* busy slots */
204 busy += txkring->nkr_num_slots;
205 m -= busy; /* subtract busy slots */
206 ND(2, "m %d limit %d", m, limit);
211 /* either the rxring is full, or nothing to send */
212 nm_txsync_finalize(txkring); /* actually useless */
216 while (limit-- > 0) {
217 struct netmap_slot *rs = &rxkring->save_ring->slot[j];
218 struct netmap_slot *ts = &txkring->ring->slot[k];
219 struct netmap_slot tmp;
226 /* no need to report the buffer change */
228 j = nm_next(j, lim_rx);
229 k = nm_next(k, lim_tx);
232 wmb(); /* make sure the slots are updated before publishing them */
233 rxkring->nr_hwtail = j;
234 txkring->nr_hwcur = k;
235 txkring->nr_hwtail = nm_prev(k, lim_tx);
237 nm_txsync_finalize(txkring);
238 ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail,
239 txkring->rcur, txkring->rhead, txkring->rtail, j);
241 wmb(); /* make sure rxkring->nr_hwtail is updated before notifying */
242 rxkring->na->nm_notify(rxkring->na, rxkring->ring_id, NR_RX, 0);
248 netmap_pipe_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
250 struct netmap_kring *rxkring = na->rx_rings + ring_nr,
251 *txkring = rxkring->pipe;
252 uint32_t oldhwcur = rxkring->nr_hwcur;
254 ND("%s %x <- %s", rxkring->name, flags, txkring->name);
255 rxkring->nr_hwcur = rxkring->rhead; /* recover user-relased slots */
256 ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail,
257 rxkring->rcur, rxkring->rhead, rxkring->rtail);
258 rmb(); /* paired with the first wmb() in txsync */
259 nm_rxsync_finalize(rxkring);
261 if (oldhwcur != rxkring->nr_hwcur) {
262 /* we have released some slots, notify the other end */
263 wmb(); /* make sure nr_hwcur is updated before notifying */
264 txkring->na->nm_notify(txkring->na, txkring->ring_id, NR_TX, 0);
269 /* Pipe endpoints are created and destroyed together, so that endopoints do not
270 * have to check for the existence of their peer at each ?xsync.
272 * To play well with the existing netmap infrastructure (refcounts etc.), we
273 * adopt the following strategy:
275 * 1) The first endpoint that is created also creates the other endpoint and
276 * grabs a reference to it.
278 * state A) user1 --> endpoint1 --> endpoint2
280 * 2) If, starting from state A, endpoint2 is then registered, endpoint1 gives
281 * its reference to the user:
283 * state B) user1 --> endpoint1 endpoint2 <--- user2
285 * 3) Assume that, starting from state B endpoint2 is closed. In the unregister
286 * callback endpoint2 notes that endpoint1 is still active and adds a reference
287 * from endpoint1 to itself. When user2 then releases her own reference,
288 * endpoint2 is not destroyed and we are back to state A. A symmetrical state
289 * would be reached if endpoint1 were released instead.
291 * 4) If, starting from state A, endpoint1 is closed, the destructor notes that
292 * it owns a reference to endpoint2 and releases it.
294 * Something similar goes on for the creation and destruction of the krings.
298 /* netmap_pipe_krings_delete.
300 * There are two cases:
306 * and we are e1. We have to create both sets
313 * and we are e2. e1 is certainly registered and our
314 * krings already exist, but they may be hidden.
317 netmap_pipe_krings_create(struct netmap_adapter *na)
319 struct netmap_pipe_adapter *pna =
320 (struct netmap_pipe_adapter *)na;
321 struct netmap_adapter *ona = &pna->peer->up;
327 D("%p: case 1, create everything", na);
328 error = netmap_krings_create(na, 0);
332 /* we also create all the rings, since we need to
333 * update the save_ring pointers.
334 * netmap_mem_rings_create (called by our caller)
335 * will not create the rings again
338 error = netmap_mem_rings_create(na);
342 /* update our hidden ring pointers */
343 for (i = 0; i < na->num_tx_rings + 1; i++)
344 na->tx_rings[i].save_ring = na->tx_rings[i].ring;
345 for (i = 0; i < na->num_rx_rings + 1; i++)
346 na->rx_rings[i].save_ring = na->rx_rings[i].ring;
348 /* now, create krings and rings of the other end */
349 error = netmap_krings_create(ona, 0);
353 error = netmap_mem_rings_create(ona);
357 for (i = 0; i < ona->num_tx_rings + 1; i++)
358 ona->tx_rings[i].save_ring = ona->tx_rings[i].ring;
359 for (i = 0; i < ona->num_rx_rings + 1; i++)
360 ona->rx_rings[i].save_ring = ona->rx_rings[i].ring;
362 /* cross link the krings */
363 for (i = 0; i < na->num_tx_rings; i++) {
364 na->tx_rings[i].pipe = pna->peer->up.rx_rings + i;
365 na->rx_rings[i].pipe = pna->peer->up.tx_rings + i;
366 pna->peer->up.tx_rings[i].pipe = na->rx_rings + i;
367 pna->peer->up.rx_rings[i].pipe = na->tx_rings + i;
372 /* recover the hidden rings */
373 ND("%p: case 2, hidden rings", na);
374 for (i = 0; i < na->num_tx_rings + 1; i++)
375 na->tx_rings[i].ring = na->tx_rings[i].save_ring;
376 for (i = 0; i < na->num_rx_rings + 1; i++)
377 na->rx_rings[i].ring = na->rx_rings[i].save_ring;
382 netmap_krings_delete(ona);
384 netmap_mem_rings_delete(na);
386 netmap_krings_delete(na);
393 * There are two cases on registration (onoff==1)
399 * and we are e1. Nothing special to do.
403 * usr1 --> e1 --> e2 <-- usr2
405 * and we are e2. Drop the ref e1 is holding.
407 * There are two additional cases on unregister (onoff==0)
413 * and we are e1. Nothing special to do, e2 will
414 * be cleaned up by the destructor of e1.
418 * usr1 --> e1 e2 <-- usr2
420 * and we are either e1 or e2. Add a ref from the
421 * other end and hide our rings.
424 netmap_pipe_reg(struct netmap_adapter *na, int onoff)
426 struct netmap_pipe_adapter *pna =
427 (struct netmap_pipe_adapter *)na;
428 struct ifnet *ifp = na->ifp;
429 ND("%p: onoff %d", na, onoff);
431 ifp->if_capenable |= IFCAP_NETMAP;
433 ifp->if_capenable &= ~IFCAP_NETMAP;
436 ND("%p: case 1.a or 2.a, nothing to do", na);
440 ND("%p: case 1.b, drop peer", na);
441 pna->peer->peer_ref = 0;
442 netmap_adapter_put(na);
445 ND("%p: case 2.b, grab peer", na);
446 netmap_adapter_get(na);
447 pna->peer->peer_ref = 1;
448 /* hide our rings from netmap_mem_rings_delete */
449 for (i = 0; i < na->num_tx_rings + 1; i++) {
450 na->tx_rings[i].ring = NULL;
452 for (i = 0; i < na->num_rx_rings + 1; i++) {
453 na->rx_rings[i].ring = NULL;
459 /* netmap_pipe_krings_delete.
461 * There are two cases:
467 * and we are e1 (e2 is not registered, so krings_delete cannot be
472 * usr1 --> e1 e2 <-- usr2
474 * and we are either e1 or e2.
476 * In the former case we have to also delete the krings of e2;
477 * in the latter case we do nothing (note that our krings
478 * have already been hidden in the unregister callback).
481 netmap_pipe_krings_delete(struct netmap_adapter *na)
483 struct netmap_pipe_adapter *pna =
484 (struct netmap_pipe_adapter *)na;
485 struct netmap_adapter *ona; /* na of the other end */
488 if (!pna->peer_ref) {
489 ND("%p: case 2, kept alive by peer", na);
493 ND("%p: case 1, deleting everyhing", na);
494 netmap_krings_delete(na); /* also zeroes tx_rings etc. */
495 /* restore the ring to be deleted on the peer */
496 ona = &pna->peer->up;
497 if (ona->tx_rings == NULL) {
498 /* already deleted, we must be on an
499 * cleanup-after-error path */
502 for (i = 0; i < ona->num_tx_rings + 1; i++)
503 ona->tx_rings[i].ring = ona->tx_rings[i].save_ring;
504 for (i = 0; i < ona->num_rx_rings + 1; i++)
505 ona->rx_rings[i].ring = ona->rx_rings[i].save_ring;
506 netmap_mem_rings_delete(ona);
507 netmap_krings_delete(ona);
512 netmap_pipe_dtor(struct netmap_adapter *na)
514 struct netmap_pipe_adapter *pna =
515 (struct netmap_pipe_adapter *)na;
518 ND("%p: clean up peer", na);
520 netmap_adapter_put(&pna->peer->up);
522 if (pna->role == NR_REG_PIPE_MASTER)
523 netmap_pipe_remove(pna->parent, pna);
524 netmap_adapter_put(pna->parent);
525 free(na->ifp, M_DEVBUF);
531 netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
534 struct netmap_adapter *pna; /* parent adapter */
535 struct netmap_pipe_adapter *mna, *sna, *req;
536 struct ifnet *ifp, *ifp2;
538 int role = nmr->nr_flags & NR_REG_MASK;
541 ND("flags %x", nmr->nr_flags);
543 if (role != NR_REG_PIPE_MASTER && role != NR_REG_PIPE_SLAVE) {
547 role = nmr->nr_flags & NR_REG_MASK;
549 /* first, try to find the parent adapter */
550 bzero(&pnmr, sizeof(pnmr));
551 memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ);
552 /* pass to parent the requested number of pipes */
553 pnmr.nr_arg1 = nmr->nr_arg1;
554 error = netmap_get_na(&pnmr, &pna, create);
556 ND("parent lookup failed: %d", error);
559 ND("found parent: %s", NM_IFPNAME(pna->ifp));
561 if (NETMAP_OWNED_BY_KERN(pna)) {
567 /* next, lookup the pipe id in the parent list */
569 pipe_id = nmr->nr_ringid & NETMAP_RING_MASK;
570 mna = netmap_pipe_find(pna, pipe_id);
572 if (mna->role == role) {
573 ND("found %d directly at %d", pipe_id, mna->parent_slot);
576 ND("found %d indirectly at %d", pipe_id, mna->parent_slot);
579 /* the pipe we have found already holds a ref to the parent,
580 * so we need to drop the one we got from netmap_get_na()
582 netmap_adapter_put(pna);
585 ND("pipe %d not found, create %d", pipe_id, create);
590 /* we create both master and slave.
591 * The endpoint we were asked for holds a reference to
594 ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
599 strcpy(ifp->if_xname, NM_IFPNAME(pna->ifp));
601 mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
609 mna->role = NR_REG_PIPE_MASTER;
612 mna->up.nm_txsync = netmap_pipe_txsync;
613 mna->up.nm_rxsync = netmap_pipe_rxsync;
614 mna->up.nm_register = netmap_pipe_reg;
615 mna->up.nm_dtor = netmap_pipe_dtor;
616 mna->up.nm_krings_create = netmap_pipe_krings_create;
617 mna->up.nm_krings_delete = netmap_pipe_krings_delete;
618 mna->up.nm_mem = pna->nm_mem;
619 mna->up.na_lut = pna->na_lut;
620 mna->up.na_lut_objtotal = pna->na_lut_objtotal;
622 mna->up.num_tx_rings = 1;
623 mna->up.num_rx_rings = 1;
624 mna->up.num_tx_desc = nmr->nr_tx_slots;
625 nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
626 1, NM_PIPE_MAXSLOTS, NULL);
627 mna->up.num_rx_desc = nmr->nr_rx_slots;
628 nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
629 1, NM_PIPE_MAXSLOTS, NULL);
630 error = netmap_attach_common(&mna->up);
633 /* register the master with the parent */
634 error = netmap_pipe_add(pna, mna);
638 /* create the slave */
639 ifp2 = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
644 strcpy(ifp2->if_xname, NM_IFPNAME(pna->ifp));
646 sna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
651 /* most fields are the same, copy from master and then fix */
654 sna->role = NR_REG_PIPE_SLAVE;
655 error = netmap_attach_common(&sna->up);
659 /* join the two endpoints */
663 /* we already have a reference to the parent, but we
664 * need another one for the other endpoint we created
666 netmap_adapter_get(pna);
668 if (role == NR_REG_PIPE_MASTER) {
671 netmap_adapter_get(&sna->up);
675 netmap_adapter_get(&mna->up);
677 ND("created master %p and slave %p", mna, sna);
680 ND("pipe %d %s at %p", pipe_id,
681 (req->role == NR_REG_PIPE_MASTER ? "master" : "slave"), req);
683 netmap_adapter_get(*na);
685 /* write the configuration back */
686 nmr->nr_tx_rings = req->up.num_tx_rings;
687 nmr->nr_rx_rings = req->up.num_rx_rings;
688 nmr->nr_tx_slots = req->up.num_tx_desc;
689 nmr->nr_rx_slots = req->up.num_rx_desc;
691 /* keep the reference to the parent.
692 * It will be released by the req destructor
700 free(ifp2, M_DEVBUF);
706 netmap_adapter_put(pna);
711 #endif /* WITH_PIPES */