2 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * This module implements the VALE switch for netmap
32 NMG_LOCK() serializes all modifications to switches and ports.
33 A switch cannot be deleted until all ports are gone.
35 For each switch, an SX lock (RWlock on linux) protects
36 deletion of ports. When configuring or deleting a new port, the
37 lock is acquired in exclusive mode (after holding NMG_LOCK).
38 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39 The lock is held throughout the entire forwarding cycle,
40 during which the thread may incur in a page fault.
41 Hence it is important that sleepable shared locks are used.
43 On the rx ring, the per-port lock is grabbed initially to reserve
44 a number of slot in the ring, then the lock is released,
45 packets are copied from source to destination, and then
46 the lock is acquired again and the receive ring is updated.
47 (A similar thing is done on the tx ring for NIC and host stack
48 ports attached to the switch)
53 * OS-specific code that is used only within this file.
54 * Other OS-specific code that must be accessed by drivers
55 * is present in netmap_kern.h
58 #if defined(__FreeBSD__)
59 #include <sys/cdefs.h> /* prerequisite */
60 __FBSDID("$FreeBSD$");
62 #include <sys/types.h>
63 #include <sys/errno.h>
64 #include <sys/param.h> /* defines used in kernel.h */
65 #include <sys/kernel.h> /* types used in module initialization */
66 #include <sys/conf.h> /* cdevsw struct, UID, GID */
67 #include <sys/sockio.h>
68 #include <sys/socketvar.h> /* struct socket */
69 #include <sys/malloc.h>
71 #include <sys/rwlock.h>
72 #include <sys/socket.h> /* sockaddrs */
73 #include <sys/selinfo.h>
74 #include <sys/sysctl.h>
76 #include <net/if_var.h>
77 #include <net/bpf.h> /* BIOCIMMEDIATE */
78 #include <machine/bus.h> /* bus_dmamap_* */
79 #include <sys/endian.h>
80 #include <sys/refcount.h>
83 #define BDG_RWLOCK_T struct rwlock // struct rwlock
85 #define BDG_RWINIT(b) \
86 rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87 #define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock)
88 #define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock)
89 #define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock)
90 #define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock)
91 #define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock)
92 #define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock)
99 #elif defined(__APPLE__)
101 #warning OSX support is only partial
102 #include "osx_glue.h"
106 #error Unsupported platform
108 #endif /* unsupported */
114 #include <net/netmap.h>
115 #include <dev/netmap/netmap_kern.h>
116 #include <dev/netmap/netmap_mem2.h>
121 * system parameters (most of them in netmap_kern.h)
122 * NM_NAME prefix for switch port names, default "vale"
123 * NM_BDG_MAXPORTS number of ports
124 * NM_BRIDGES max number of switches in the system.
125 * XXX should become a sysctl or tunable
127 * Switch ports are named valeX:Y where X is the switch name and Y
128 * is the port. If Y matches a physical interface name, the port is
129 * connected to a physical device.
131 * Unlike physical interfaces, switch ports use their own memory region
132 * for rings and buffers.
133 * The virtual interfaces use per-queue lock instead of core lock.
134 * In the tx loop, we aggregate traffic in batches to make all operations
135 * faster. The batch size is bridge_batch.
137 #define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */
138 #define NM_BDG_MAXSLOTS 4096 /* XXX same as above */
139 #define NM_BRIDGE_RINGSIZE 1024 /* in the device */
140 #define NM_BDG_HASH 1024 /* forwarding table entries */
141 #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */
142 #define NM_MULTISEG 64 /* max size of a chain of bufs */
143 /* actual size of the tables */
144 #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG)
145 /* NM_FT_NULL terminates a list of slots in the ft */
146 #define NM_FT_NULL NM_BDG_BATCH_MAX
147 #define NM_BRIDGES 8 /* number of bridges */
151 * bridge_batch is set via sysctl to the max batch size to be
152 * used in the bridge. The actual value may be larger as the
153 * last packet in the block may overflow the size.
155 int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
156 SYSCTL_DECL(_dev_netmap);
157 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
160 static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
161 static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
162 static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
165 * For each output interface, nm_bdg_q is used to construct a list.
166 * bq_len is the number of output buffers (we can have coalescing
172 uint32_t bq_len; /* number of buffers */
175 /* XXX revise this */
177 uint64_t mac; /* the top 2 bytes are the epoch */
182 * nm_bridge is a descriptor for a VALE switch.
183 * Interfaces for a bridge are all in bdg_ports[].
184 * The array has fixed size, an empty entry does not terminate
185 * the search, but lookups only occur on attach/detach so we
186 * don't mind if they are slow.
188 * The bridge is non blocking on the transmit ports: excess
189 * packets are dropped if there is no room on the output port.
191 * bdg_lock protects accesses to the bdg_ports array.
192 * This is a rw lock (or equivalent).
195 /* XXX what is the proper alignment/layout ? */
196 BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */
198 uint32_t bdg_active_ports; /* 0 means free */
199 char bdg_basename[IFNAMSIZ];
201 /* Indexes of active ports (up to active_ports)
202 * and all other remaining ports.
204 uint8_t bdg_port_index[NM_BDG_MAXPORTS];
206 struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
210 * The function to decide the destination port.
211 * It returns either of an index of the destination port,
212 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
213 * forward this packet. ring_nr is the source ring index, and the
214 * function may overwrite this value to forward this packet to a
215 * different ring index.
216 * This function must be set by netmap_bdgctl().
218 struct netmap_bdg_ops bdg_ops;
220 /* the forwarding table, MAC+ports.
221 * XXX should be changed to an argument to be passed to
222 * the lookup function, and allocated on attach
224 struct nm_hash_ent ht[NM_BDG_HASH];
228 netmap_bdg_name(struct netmap_vp_adapter *vp)
230 struct nm_bridge *b = vp->na_bdg;
233 return b->bdg_basename;
238 * XXX in principle nm_bridges could be created dynamically
239 * Right now we have a static array and deletions are protected
240 * by an exclusive lock.
242 struct nm_bridge nm_bridges[NM_BRIDGES];
246 * this is a slightly optimized copy routine which rounds
247 * to multiple of 64 bytes and is often faster than dealing
248 * with other odd sizes. We assume there is enough room
249 * in the source and destination buffers.
251 * XXX only for multiples of 64 bytes, non overlapped.
254 pkt_copy(void *_src, void *_dst, int l)
256 uint64_t *src = _src;
257 uint64_t *dst = _dst;
258 if (unlikely(l >= 1024)) {
262 for (; likely(l > 0); l-=64) {
276 * locate a bridge among the existing ones.
277 * MUST BE CALLED WITH NMG_LOCK()
279 * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
280 * We assume that this is called with a name of at least NM_NAME chars.
282 static struct nm_bridge *
283 nm_find_bridge(const char *name, int create)
286 struct nm_bridge *b = NULL;
290 namelen = strlen(NM_NAME); /* base length */
291 l = name ? strlen(name) : 0; /* actual length */
293 D("invalid bridge name %s", name ? name : NULL);
296 for (i = namelen + 1; i < l; i++) {
297 if (name[i] == ':') {
302 if (namelen >= IFNAMSIZ)
304 ND("--- prefix is '%.*s' ---", namelen, name);
306 /* lookup the name, remember empty slot if there is one */
307 for (i = 0; i < NM_BRIDGES; i++) {
308 struct nm_bridge *x = nm_bridges + i;
310 if (x->bdg_active_ports == 0) {
311 if (create && b == NULL)
312 b = x; /* record empty slot */
313 } else if (x->bdg_namelen != namelen) {
315 } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
316 ND("found '%.*s' at %d", namelen, name, i);
321 if (i == NM_BRIDGES && b) { /* name not found, can create entry */
322 /* initialize the bridge */
323 strncpy(b->bdg_basename, name, namelen);
324 ND("create new bridge %s with ports %d", b->bdg_basename,
325 b->bdg_active_ports);
326 b->bdg_namelen = namelen;
327 b->bdg_active_ports = 0;
328 for (i = 0; i < NM_BDG_MAXPORTS; i++)
329 b->bdg_port_index[i] = i;
330 /* set the default function */
331 b->bdg_ops.lookup = netmap_bdg_learning;
332 /* reset the MAC address table */
333 bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
340 * Free the forwarding tables for rings attached to switch ports.
343 nm_free_bdgfwd(struct netmap_adapter *na)
346 struct netmap_kring *kring;
349 nrings = na->num_tx_rings;
350 kring = na->tx_rings;
351 for (i = 0; i < nrings; i++) {
352 if (kring[i].nkr_ft) {
353 free(kring[i].nkr_ft, M_DEVBUF);
354 kring[i].nkr_ft = NULL; /* protect from freeing twice */
361 * Allocate the forwarding tables for the rings attached to the bridge ports.
364 nm_alloc_bdgfwd(struct netmap_adapter *na)
366 int nrings, l, i, num_dstq;
367 struct netmap_kring *kring;
370 /* all port:rings + broadcast */
371 num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
372 l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
373 l += sizeof(struct nm_bdg_q) * num_dstq;
374 l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
376 nrings = netmap_real_tx_rings(na);
377 kring = na->tx_rings;
378 for (i = 0; i < nrings; i++) {
379 struct nm_bdg_fwd *ft;
380 struct nm_bdg_q *dstq;
383 ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
388 dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
389 for (j = 0; j < num_dstq; j++) {
390 dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
393 kring[i].nkr_ft = ft;
399 /* remove from bridge b the ports in slots hw and sw
400 * (sw can be -1 if not needed)
403 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
405 int s_hw = hw, s_sw = sw;
406 int i, lim =b->bdg_active_ports;
407 uint8_t tmp[NM_BDG_MAXPORTS];
411 make a copy of bdg_port_index;
412 lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
413 in the array of bdg_port_index, replacing them with
414 entries from the bottom of the array;
415 decrement bdg_active_ports;
416 acquire BDG_WLOCK() and copy back the array.
420 D("detach %d and %d (lim %d)", hw, sw, lim);
421 /* make a copy of the list of active ports, update it,
422 * and then copy back within BDG_WLOCK().
424 memcpy(tmp, b->bdg_port_index, sizeof(tmp));
425 for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
426 if (hw >= 0 && tmp[i] == hw) {
427 ND("detach hw %d at %d", hw, i);
428 lim--; /* point to last active port */
429 tmp[i] = tmp[lim]; /* swap with i */
430 tmp[lim] = hw; /* now this is inactive */
432 } else if (sw >= 0 && tmp[i] == sw) {
433 ND("detach sw %d at %d", sw, i);
442 if (hw >= 0 || sw >= 0) {
443 D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
448 b->bdg_ops.dtor(b->bdg_ports[s_hw]);
449 b->bdg_ports[s_hw] = NULL;
451 b->bdg_ports[s_sw] = NULL;
453 memcpy(b->bdg_port_index, tmp, sizeof(tmp));
454 b->bdg_active_ports = lim;
457 ND("now %d active ports", lim);
459 ND("marking bridge %s as free", b->bdg_basename);
460 bzero(&b->bdg_ops, sizeof(b->bdg_ops));
464 /* nm_bdg_ctl callback for VALE ports */
466 netmap_vp_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
468 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
469 struct nm_bridge *b = vpna->na_bdg;
472 return 0; /* nothing to do */
474 netmap_set_all_rings(na, 0 /* disable */);
475 netmap_bdg_detach_common(b, vpna->bdg_port, -1);
477 netmap_set_all_rings(na, 1 /* enable */);
479 /* I have took reference just for attach */
480 netmap_adapter_put(na);
484 /* nm_dtor callback for ephemeral VALE ports */
486 netmap_vp_dtor(struct netmap_adapter *na)
488 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
489 struct nm_bridge *b = vpna->na_bdg;
491 ND("%s has %d references", na->name, na->na_refcount);
494 netmap_bdg_detach_common(b, vpna->bdg_port, -1);
498 /* nm_dtor callback for persistent VALE ports */
500 netmap_persist_vp_dtor(struct netmap_adapter *na)
502 struct ifnet *ifp = na->ifp;
509 /* remove a persistent VALE port from the system */
511 nm_vi_destroy(const char *name)
516 ifp = ifunit_ref(name);
520 /* make sure this is actually a VALE port */
521 if (!NETMAP_CAPABLE(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
526 if (NA(ifp)->na_refcount > 1) {
532 D("destroying a persistent vale interface %s", ifp->if_xname);
533 /* Linux requires all the references are released
547 * Create a virtual interface registered to the system.
548 * The interface will be attached to a bridge later.
551 nm_vi_create(struct nmreq *nmr)
554 struct netmap_vp_adapter *vpna;
557 /* don't include VALE prefix */
558 if (!strncmp(nmr->nr_name, NM_NAME, strlen(NM_NAME)))
560 ifp = ifunit_ref(nmr->nr_name);
561 if (ifp) { /* already exist, cannot create new one */
565 error = nm_vi_persist(nmr->nr_name, &ifp);
570 /* netmap_vp_create creates a struct netmap_vp_adapter */
571 error = netmap_vp_create(nmr, ifp, &vpna);
573 D("error %d", error);
577 /* persist-specific routines */
578 vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
579 vpna->up.nm_dtor = netmap_persist_vp_dtor;
580 netmap_adapter_get(&vpna->up);
582 D("created %s", ifp->if_xname);
586 /* Try to get a reference to a netmap adapter attached to a VALE switch.
587 * If the adapter is found (or is created), this function returns 0, a
588 * non NULL pointer is returned into *na, and the caller holds a
589 * reference to the adapter.
590 * If an adapter is not found, then no reference is grabbed and the
591 * function returns an error code, or 0 if there is just a VALE prefix
592 * mismatch. Therefore the caller holds a reference when
593 * (*na != NULL && return == 0).
596 netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
598 char *nr_name = nmr->nr_name;
602 struct netmap_vp_adapter *vpna, *hostna = NULL;
604 int i, j, cand = -1, cand2 = -1;
607 *na = NULL; /* default return value */
609 /* first try to see if this is a bridge port. */
611 if (strncmp(nr_name, NM_NAME, sizeof(NM_NAME) - 1)) {
612 return 0; /* no error, but no VALE prefix */
615 b = nm_find_bridge(nr_name, create);
617 D("no bridges available for '%s'", nr_name);
618 return (create ? ENOMEM : ENXIO);
620 if (strlen(nr_name) < b->bdg_namelen) /* impossible */
623 /* Now we are sure that name starts with the bridge's name,
624 * lookup the port in the bridge. We need to scan the entire
625 * list. It is not important to hold a WLOCK on the bridge
626 * during the search because NMG_LOCK already guarantees
627 * that there are no other possible writers.
630 /* lookup in the local list of ports */
631 for (j = 0; j < b->bdg_active_ports; j++) {
632 i = b->bdg_port_index[j];
633 vpna = b->bdg_ports[i];
634 // KASSERT(na != NULL);
635 D("checking %s", vpna->up.name);
636 if (!strcmp(vpna->up.name, nr_name)) {
637 netmap_adapter_get(&vpna->up);
638 ND("found existing if %s refs %d", nr_name)
643 /* not found, should we create it? */
646 /* yes we should, see if we have space to attach entries */
647 needed = 2; /* in some cases we only need 1 */
648 if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
649 D("bridge full %d, cannot create new port", b->bdg_active_ports);
652 /* record the next two ports available, but do not allocate yet */
653 cand = b->bdg_port_index[b->bdg_active_ports];
654 cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
655 ND("+++ bridge %s port %s used %d avail %d %d",
656 b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
659 * try see if there is a matching NIC with this name
660 * (after the bridge's name)
662 ifname = nr_name + b->bdg_namelen + 1;
663 ifp = ifunit_ref(ifname);
665 /* Create an ephemeral virtual port
666 * This block contains all the ephemeral-specific logics
669 /* nr_cmd must be 0 for a virtual port */
673 /* bdg_netmap_attach creates a struct netmap_adapter */
674 error = netmap_vp_create(nmr, NULL, &vpna);
676 D("error %d", error);
680 /* shortcut - we can skip get_hw_na(),
681 * ownership check and nm_bdg_attach()
684 struct netmap_adapter *hw;
686 error = netmap_get_hw_na(ifp, &hw);
687 if (error || hw == NULL)
690 /* host adapter might not be created */
691 error = hw->nm_bdg_attach(nr_name, hw);
695 hostna = hw->na_hostvp;
697 if (nmr->nr_arg1 != NETMAP_BDG_HOST)
702 vpna->bdg_port = cand;
703 ND("NIC %p to bridge port %d", vpna, cand);
704 /* bind the port to the bridge (virtual ports are not active) */
705 b->bdg_ports[cand] = vpna;
707 b->bdg_active_ports++;
708 if (hostna != NULL) {
709 /* also bind the host stack to the bridge */
710 b->bdg_ports[cand2] = hostna;
711 hostna->bdg_port = cand2;
713 b->bdg_active_ports++;
714 ND("host %p to bridge port %d", hostna, cand2);
716 ND("if %s refs %d", ifname, vpna->up.na_refcount);
719 netmap_adapter_get(*na);
729 /* Process NETMAP_BDG_ATTACH */
731 nm_bdg_ctl_attach(struct nmreq *nmr)
733 struct netmap_adapter *na;
738 error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
739 if (error) /* no device */
742 if (na == NULL) { /* VALE prefix missing */
747 if (NETMAP_OWNED_BY_ANY(na)) {
752 if (na->nm_bdg_ctl) {
753 /* nop for VALE ports. The bwrap needs to put the hwna
754 * in netmap mode (see netmap_bwrap_bdg_ctl)
756 error = na->nm_bdg_ctl(na, nmr, 1);
759 ND("registered %s to netmap-mode", na->name);
765 netmap_adapter_put(na);
772 /* process NETMAP_BDG_DETACH */
774 nm_bdg_ctl_detach(struct nmreq *nmr)
776 struct netmap_adapter *na;
780 error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
781 if (error) { /* no device, or another bridge or user owns the device */
785 if (na == NULL) { /* VALE prefix missing */
790 if (na->nm_bdg_ctl) {
791 /* remove the port from bridge. The bwrap
792 * also needs to put the hwna in normal mode
794 error = na->nm_bdg_ctl(na, nmr, 0);
797 netmap_adapter_put(na);
805 /* Called by either user's context (netmap_ioctl())
806 * or external kernel modules (e.g., Openvswitch).
807 * Operation is indicated in nmr->nr_cmd.
808 * NETMAP_BDG_OPS that sets configure/lookup/dtor functions to the bridge
809 * requires bdg_ops argument; the other commands ignore this argument.
811 * Called without NMG_LOCK.
814 netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
817 struct netmap_adapter *na;
818 struct netmap_vp_adapter *vpna;
819 char *name = nmr->nr_name;
820 int cmd = nmr->nr_cmd, namelen = strlen(name);
824 case NETMAP_BDG_NEWIF:
825 error = nm_vi_create(nmr);
828 case NETMAP_BDG_DELIF:
829 error = nm_vi_destroy(nmr->nr_name);
832 case NETMAP_BDG_ATTACH:
833 error = nm_bdg_ctl_attach(nmr);
836 case NETMAP_BDG_DETACH:
837 error = nm_bdg_ctl_detach(nmr);
840 case NETMAP_BDG_LIST:
841 /* this is used to enumerate bridges and ports */
842 if (namelen) { /* look up indexes of bridge and port */
843 if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
848 b = nm_find_bridge(name, 0 /* don't create */);
855 name = name + b->bdg_namelen + 1;
857 for (j = 0; j < b->bdg_active_ports; j++) {
858 i = b->bdg_port_index[j];
859 vpna = b->bdg_ports[i];
861 D("---AAAAAAAAARGH-------");
864 /* the former and the latter identify a
865 * virtual port and a NIC, respectively
867 if (!strcmp(vpna->up.name, name)) {
869 nmr->nr_arg1 = b - nm_bridges;
870 nmr->nr_arg2 = i; /* port index */
877 /* return the first non-empty entry starting from
878 * bridge nr_arg1 and port nr_arg2.
880 * Users can detect the end of the same bridge by
881 * seeing the new and old value of nr_arg1, and can
882 * detect the end of all the bridge by error != 0
888 for (error = ENOENT; i < NM_BRIDGES; i++) {
890 if (j >= b->bdg_active_ports) {
891 j = 0; /* following bridges scan from 0 */
896 j = b->bdg_port_index[j];
897 vpna = b->bdg_ports[j];
898 strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
906 case NETMAP_BDG_REGOPS: /* XXX this should not be available from userspace */
907 /* register callbacks to the given bridge.
908 * nmr->nr_name may be just bridge's name (including ':'
909 * if it is not just NM_NAME).
916 b = nm_find_bridge(name, 0 /* don't create */);
920 b->bdg_ops = *bdg_ops;
925 case NETMAP_BDG_VNET_HDR:
926 /* Valid lengths for the virtio-net header are 0 (no header),
928 if (nmr->nr_arg1 != 0 &&
929 nmr->nr_arg1 != sizeof(struct nm_vnet_hdr) &&
930 nmr->nr_arg1 != 12) {
935 error = netmap_get_bdg_na(nmr, &na, 0);
937 vpna = (struct netmap_vp_adapter *)na;
938 vpna->virt_hdr_len = nmr->nr_arg1;
939 if (vpna->virt_hdr_len)
940 vpna->mfs = NETMAP_BUF_SIZE(na);
941 D("Using vnet_hdr_len %d for %p", vpna->virt_hdr_len, vpna);
942 netmap_adapter_put(na);
948 D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
956 netmap_bdg_config(struct nmreq *nmr)
962 b = nm_find_bridge(nmr->nr_name, 0);
968 /* Don't call config() with NMG_LOCK() held */
970 if (b->bdg_ops.config != NULL)
971 error = b->bdg_ops.config((struct nm_ifreq *)nmr);
977 /* nm_krings_create callback for VALE ports.
978 * Calls the standard netmap_krings_create, then adds leases on rx
979 * rings and bdgfwd on tx rings.
982 netmap_vp_krings_create(struct netmap_adapter *na)
987 u_int nrx = netmap_real_rx_rings(na);
990 * Leases are attached to RX rings on vale ports
992 tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
994 error = netmap_krings_create(na, tailroom);
998 leases = na->tailroom;
1000 for (i = 0; i < nrx; i++) { /* Receive rings */
1001 na->rx_rings[i].nkr_leases = leases;
1002 leases += na->num_rx_desc;
1005 error = nm_alloc_bdgfwd(na);
1007 netmap_krings_delete(na);
1015 /* nm_krings_delete callback for VALE ports. */
1017 netmap_vp_krings_delete(struct netmap_adapter *na)
1020 netmap_krings_delete(na);
1025 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
1026 struct netmap_vp_adapter *na, u_int ring_nr);
1030 * main dispatch routine for the bridge.
1031 * Grab packets from a kring, move them into the ft structure
1032 * associated to the tx (input) port. Max one instance per port,
1033 * filtered on input (ioctl, poll or XXX).
1034 * Returns the next position in the ring.
1037 nm_bdg_preflush(struct netmap_kring *kring, u_int end)
1039 struct netmap_vp_adapter *na =
1040 (struct netmap_vp_adapter*)kring->na;
1041 struct netmap_ring *ring = kring->ring;
1042 struct nm_bdg_fwd *ft;
1043 u_int ring_nr = kring->ring_id;
1044 u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
1045 u_int ft_i = 0; /* start from 0 */
1046 u_int frags = 1; /* how many frags ? */
1047 struct nm_bridge *b = na->na_bdg;
1049 /* To protect against modifications to the bridge we acquire a
1050 * shared lock, waiting if we can sleep (if the source port is
1051 * attached to a user process) or with a trylock otherwise (NICs).
1053 ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1054 if (na->up.na_flags & NAF_BDG_MAYSLEEP)
1056 else if (!BDG_RTRYLOCK(b))
1058 ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
1061 for (; likely(j != end); j = nm_next(j, lim)) {
1062 struct netmap_slot *slot = &ring->slot[j];
1065 ft[ft_i].ft_len = slot->len;
1066 ft[ft_i].ft_flags = slot->flags;
1068 ND("flags is 0x%x", slot->flags);
1069 /* this slot goes into a list so initialize the link field */
1070 ft[ft_i].ft_next = NM_FT_NULL;
1071 buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
1072 (void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
1073 if (unlikely(buf == NULL)) {
1074 RD(5, "NULL %s buffer pointer from %s slot %d len %d",
1075 (slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
1076 kring->name, j, ft[ft_i].ft_len);
1077 buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
1078 ft[ft_i].ft_len = 0;
1079 ft[ft_i].ft_flags = 0;
1081 __builtin_prefetch(buf);
1083 if (slot->flags & NS_MOREFRAG) {
1087 if (unlikely(netmap_verbose && frags > 1))
1088 RD(5, "%d frags at %d", frags, ft_i - frags);
1089 ft[ft_i - frags].ft_frags = frags;
1091 if (unlikely((int)ft_i >= bridge_batch))
1092 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1095 D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1096 // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1097 ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1098 ft[ft_i - frags].ft_frags = frags - 1;
1101 ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1107 /* ----- FreeBSD if_bridge hash function ------- */
1110 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1111 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1113 * http://www.burtleburtle.net/bob/hash/spooky.html
1115 #define mix(a, b, c) \
1117 a -= b; a -= c; a ^= (c >> 13); \
1118 b -= c; b -= a; b ^= (a << 8); \
1119 c -= a; c -= b; c ^= (b >> 13); \
1120 a -= b; a -= c; a ^= (c >> 12); \
1121 b -= c; b -= a; b ^= (a << 16); \
1122 c -= a; c -= b; c ^= (b >> 5); \
1123 a -= b; a -= c; a ^= (c >> 3); \
1124 b -= c; b -= a; b ^= (a << 10); \
1125 c -= a; c -= b; c ^= (b >> 15); \
1126 } while (/*CONSTCOND*/0)
1129 static __inline uint32_t
1130 nm_bridge_rthash(const uint8_t *addr)
1132 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1142 #define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1)
1143 return (c & BRIDGE_RTHASH_MASK);
1149 /* nm_register callback for VALE ports */
1151 netmap_vp_reg(struct netmap_adapter *na, int onoff)
1153 struct netmap_vp_adapter *vpna =
1154 (struct netmap_vp_adapter*)na;
1156 /* persistent ports may be put in netmap mode
1157 * before being attached to a bridge
1160 BDG_WLOCK(vpna->na_bdg);
1162 na->na_flags |= NAF_NETMAP_ON;
1163 /* XXX on FreeBSD, persistent VALE ports should also
1164 * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
1167 na->na_flags &= ~NAF_NETMAP_ON;
1170 BDG_WUNLOCK(vpna->na_bdg);
1176 * Lookup function for a learning bridge.
1177 * Update the hash table with the source address,
1178 * and then returns the destination port index, and the
1179 * ring in *dst_ring (at the moment, always use ring 0)
1182 netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
1183 const struct netmap_vp_adapter *na)
1185 uint8_t *buf = ft->ft_buf;
1186 u_int buf_len = ft->ft_len;
1187 struct nm_hash_ent *ht = na->na_bdg->ht;
1189 u_int dst, mysrc = na->bdg_port;
1190 uint64_t smac, dmac;
1192 /* safety check, unfortunately we have many cases */
1193 if (buf_len >= 14 + na->virt_hdr_len) {
1194 /* virthdr + mac_hdr in the same slot */
1195 buf += na->virt_hdr_len;
1196 buf_len -= na->virt_hdr_len;
1197 } else if (buf_len == na->virt_hdr_len && ft->ft_flags & NS_MOREFRAG) {
1198 /* only header in first fragment */
1201 buf_len = ft->ft_len;
1203 RD(5, "invalid buf format, length %d", buf_len);
1204 return NM_BDG_NOPORT;
1206 dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1207 smac = le64toh(*(uint64_t *)(buf + 4));
1211 * The hash is somewhat expensive, there might be some
1212 * worthwhile optimizations here.
1214 if ((buf[6] & 1) == 0) { /* valid src */
1216 sh = nm_bridge_rthash(s); // XXX hash of source
1217 /* update source port forwarding entry */
1218 ht[sh].mac = smac; /* XXX expire ? */
1219 ht[sh].ports = mysrc;
1221 D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1222 s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1224 dst = NM_BDG_BROADCAST;
1225 if ((buf[0] & 1) == 0) { /* unicast */
1226 dh = nm_bridge_rthash(buf); // XXX hash of dst
1227 if (ht[dh].mac == dmac) { /* found dst */
1230 /* XXX otherwise return NM_BDG_UNKNOWN ? */
1238 * Available space in the ring. Only used in VALE code
1239 * and only with is_rx = 1
1241 static inline uint32_t
1242 nm_kr_space(struct netmap_kring *k, int is_rx)
1247 int busy = k->nkr_hwlease - k->nr_hwcur;
1249 busy += k->nkr_num_slots;
1250 space = k->nkr_num_slots - 1 - busy;
1252 /* XXX never used in this branch */
1253 space = k->nr_hwtail - k->nkr_hwlease;
1255 space += k->nkr_num_slots;
1259 if (k->nkr_hwlease >= k->nkr_num_slots ||
1260 k->nr_hwcur >= k->nkr_num_slots ||
1261 k->nr_tail >= k->nkr_num_slots ||
1263 busy >= k->nkr_num_slots) {
1264 D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1265 k->nkr_lease_idx, k->nkr_num_slots);
1274 /* make a lease on the kring for N positions. return the
1276 * XXX only used in VALE code and with is_rx = 1
1278 static inline uint32_t
1279 nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
1281 uint32_t lim = k->nkr_num_slots - 1;
1282 uint32_t lease_idx = k->nkr_lease_idx;
1284 k->nkr_leases[lease_idx] = NR_NOSLOT;
1285 k->nkr_lease_idx = nm_next(lease_idx, lim);
1287 if (n > nm_kr_space(k, is_rx)) {
1288 D("invalid request for %d slots", n);
1291 /* XXX verify that there are n slots */
1292 k->nkr_hwlease += n;
1293 if (k->nkr_hwlease > lim)
1294 k->nkr_hwlease -= lim + 1;
1296 if (k->nkr_hwlease >= k->nkr_num_slots ||
1297 k->nr_hwcur >= k->nkr_num_slots ||
1298 k->nr_hwtail >= k->nkr_num_slots ||
1299 k->nkr_lease_idx >= k->nkr_num_slots) {
1300 D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
1302 k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
1303 k->nkr_lease_idx, k->nkr_num_slots);
1310 * This flush routine supports only unicast and broadcast but a large
1311 * number of ports, and lets us replace the learn and dispatch functions.
1314 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1317 struct nm_bdg_q *dst_ents, *brddst;
1318 uint16_t num_dsts = 0, *dsts;
1319 struct nm_bridge *b = na->na_bdg;
1320 u_int i, j, me = na->bdg_port;
1323 * The work area (pointed by ft) is followed by an array of
1324 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1325 * queues per port plus one for the broadcast traffic.
1326 * Then we have an array of destination indexes.
1328 dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1329 dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1331 /* first pass: find a destination for each packet in the batch */
1332 for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1333 uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1334 uint16_t dst_port, d_i;
1337 ND("slot %d frags %d", i, ft[i].ft_frags);
1338 /* Drop the packet if the virtio-net header is not into the first
1339 fragment nor at the very beginning of the second. */
1340 if (unlikely(na->virt_hdr_len > ft[i].ft_len))
1342 dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
1343 if (netmap_verbose > 255)
1344 RD(5, "slot %d port %d -> %d", i, me, dst_port);
1345 if (dst_port == NM_BDG_NOPORT)
1346 continue; /* this packet is identified to be dropped */
1347 else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1349 else if (dst_port == NM_BDG_BROADCAST)
1350 dst_ring = 0; /* broadcasts always go to ring 0 */
1351 else if (unlikely(dst_port == me ||
1352 !b->bdg_ports[dst_port]))
1355 /* get a position in the scratch pad */
1356 d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1359 /* append the first fragment to the list */
1360 if (d->bq_head == NM_FT_NULL) { /* new destination */
1361 d->bq_head = d->bq_tail = i;
1362 /* remember this position to be scanned later */
1363 if (dst_port != NM_BDG_BROADCAST)
1364 dsts[num_dsts++] = d_i;
1366 ft[d->bq_tail].ft_next = i;
1369 d->bq_len += ft[i].ft_frags;
1373 * Broadcast traffic goes to ring 0 on all destinations.
1374 * So we need to add these rings to the list of ports to scan.
1375 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1376 * expensive. We should keep a compact list of active destinations
1377 * so we could shorten this loop.
1379 brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1380 if (brddst->bq_head != NM_FT_NULL) {
1381 for (j = 0; likely(j < b->bdg_active_ports); j++) {
1383 i = b->bdg_port_index[j];
1384 if (unlikely(i == me))
1386 d_i = i * NM_BDG_MAXRINGS;
1387 if (dst_ents[d_i].bq_head == NM_FT_NULL)
1388 dsts[num_dsts++] = d_i;
1392 ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1393 /* second pass: scan destinations */
1394 for (i = 0; i < num_dsts; i++) {
1395 struct netmap_vp_adapter *dst_na;
1396 struct netmap_kring *kring;
1397 struct netmap_ring *ring;
1398 u_int dst_nr, lim, j, d_i, next, brd_next;
1399 u_int needed, howmany;
1400 int retry = netmap_txsync_retry;
1402 uint32_t my_start = 0, lease_idx = 0;
1404 int virt_hdr_mismatch = 0;
1407 ND("second pass %d port %d", i, d_i);
1409 // XXX fix the division
1410 dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1411 /* protect from the lookup function returning an inactive
1414 if (unlikely(dst_na == NULL))
1416 if (dst_na->up.na_flags & NAF_SW_ONLY)
1419 * The interface may be in !netmap mode in two cases:
1420 * - when na is attached but not activated yet;
1421 * - when na is being deactivated but is still attached.
1423 if (unlikely(!nm_netmap_on(&dst_na->up))) {
1424 ND("not in netmap mode!");
1428 /* there is at least one either unicast or broadcast packet */
1429 brd_next = brddst->bq_head;
1431 /* we need to reserve this many slots. If fewer are
1432 * available, some packets will be dropped.
1433 * Packets may have multiple fragments, so we may not use
1434 * there is a chance that we may not use all of the slots
1435 * we have claimed, so we will need to handle the leftover
1436 * ones when we regain the lock.
1438 needed = d->bq_len + brddst->bq_len;
1440 if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
1441 RD(3, "virt_hdr_mismatch, src %d dst %d", na->virt_hdr_len, dst_na->virt_hdr_len);
1442 /* There is a virtio-net header/offloadings mismatch between
1443 * source and destination. The slower mismatch datapath will
1444 * be used to cope with all the mismatches.
1446 virt_hdr_mismatch = 1;
1447 if (dst_na->mfs < na->mfs) {
1448 /* We may need to do segmentation offloadings, and so
1449 * we may need a number of destination slots greater
1450 * than the number of input slots ('needed').
1451 * We look for the smallest integer 'x' which satisfies:
1452 * needed * na->mfs + x * H <= x * na->mfs
1453 * where 'H' is the length of the longest header that may
1454 * be replicated in the segmentation process (e.g. for
1455 * TCPv4 we must account for ethernet header, IP header
1456 * and TCPv4 header).
1458 needed = (needed * na->mfs) /
1459 (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
1460 ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
1464 ND(5, "pass 2 dst %d is %x %s",
1465 i, d_i, is_vp ? "virtual" : "nic/host");
1466 dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1467 nrings = dst_na->up.num_rx_rings;
1468 if (dst_nr >= nrings)
1469 dst_nr = dst_nr % nrings;
1470 kring = &dst_na->up.rx_rings[dst_nr];
1472 lim = kring->nkr_num_slots - 1;
1476 if (dst_na->retry && retry) {
1477 /* try to get some free slot from the previous run */
1478 dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1479 /* actually useful only for bwraps, since there
1480 * the notify will trigger a txsync on the hwna. VALE ports
1481 * have dst_na->retry == 0
1484 /* reserve the buffers in the queue and an entry
1485 * to report completion, and drop lock.
1486 * XXX this might become a helper function.
1488 mtx_lock(&kring->q_lock);
1489 if (kring->nkr_stopped) {
1490 mtx_unlock(&kring->q_lock);
1493 my_start = j = kring->nkr_hwlease;
1494 howmany = nm_kr_space(kring, 1);
1495 if (needed < howmany)
1497 lease_idx = nm_kr_lease(kring, howmany, 1);
1498 mtx_unlock(&kring->q_lock);
1500 /* only retry if we need more than available slots */
1501 if (retry && needed <= howmany)
1504 /* copy to the destination queue */
1505 while (howmany > 0) {
1506 struct netmap_slot *slot;
1507 struct nm_bdg_fwd *ft_p, *ft_end;
1510 /* find the queue from which we pick next packet.
1511 * NM_FT_NULL is always higher than valid indexes
1512 * so we never dereference it if the other list
1513 * has packets (and if both are empty we never
1516 if (next < brd_next) {
1518 next = ft_p->ft_next;
1519 } else { /* insert broadcast */
1520 ft_p = ft + brd_next;
1521 brd_next = ft_p->ft_next;
1523 cnt = ft_p->ft_frags; // cnt > 0
1524 if (unlikely(cnt > howmany))
1525 break; /* no more space */
1526 if (netmap_verbose && cnt > 1)
1527 RD(5, "rx %d frags to %d", cnt, j);
1528 ft_end = ft_p + cnt;
1529 if (unlikely(virt_hdr_mismatch)) {
1530 bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
1534 char *dst, *src = ft_p->ft_buf;
1535 size_t copy_len = ft_p->ft_len, dst_len = copy_len;
1537 slot = &ring->slot[j];
1538 dst = NMB(&dst_na->up, slot);
1540 ND("send [%d] %d(%d) bytes at %s:%d",
1541 i, (int)copy_len, (int)dst_len,
1542 NM_IFPNAME(dst_ifp), j);
1543 /* round to a multiple of 64 */
1544 copy_len = (copy_len + 63) & ~63;
1546 if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
1547 copy_len > NETMAP_BUF_SIZE(&na->up))) {
1548 RD(5, "invalid len %d, down to 64", (int)copy_len);
1549 copy_len = dst_len = 64; // XXX
1551 if (ft_p->ft_flags & NS_INDIRECT) {
1552 if (copyin(src, dst, copy_len)) {
1553 // invalid user pointer, pretend len is 0
1557 //memcpy(dst, src, copy_len);
1558 pkt_copy(src, dst, (int)copy_len);
1560 slot->len = dst_len;
1561 slot->flags = (cnt << 8)| NS_MOREFRAG;
1562 j = nm_next(j, lim);
1565 } while (ft_p != ft_end);
1566 slot->flags = (cnt << 8); /* clear flag on last entry */
1569 if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1573 /* current position */
1574 uint32_t *p = kring->nkr_leases; /* shorthand */
1575 uint32_t update_pos;
1576 int still_locked = 1;
1578 mtx_lock(&kring->q_lock);
1579 if (unlikely(howmany > 0)) {
1580 /* not used all bufs. If i am the last one
1581 * i can recover the slots, otherwise must
1582 * fill them with 0 to mark empty packets.
1584 ND("leftover %d bufs", howmany);
1585 if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1586 /* yes i am the last one */
1587 ND("roll back nkr_hwlease to %d", j);
1588 kring->nkr_hwlease = j;
1590 while (howmany-- > 0) {
1591 ring->slot[j].len = 0;
1592 ring->slot[j].flags = 0;
1593 j = nm_next(j, lim);
1597 p[lease_idx] = j; /* report I am done */
1599 update_pos = kring->nr_hwtail;
1601 if (my_start == update_pos) {
1602 /* all slots before my_start have been reported,
1603 * so scan subsequent leases to see if other ranges
1604 * have been completed, and to a selwakeup or txsync.
1606 while (lease_idx != kring->nkr_lease_idx &&
1607 p[lease_idx] != NR_NOSLOT) {
1609 p[lease_idx] = NR_NOSLOT;
1610 lease_idx = nm_next(lease_idx, lim);
1612 /* j is the new 'write' position. j != my_start
1613 * means there are new buffers to report
1615 if (likely(j != my_start)) {
1616 kring->nr_hwtail = j;
1618 mtx_unlock(&kring->q_lock);
1619 dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1620 /* this is netmap_notify for VALE ports and
1621 * netmap_bwrap_notify for bwrap. The latter will
1622 * trigger a txsync on the underlying hwna
1624 if (dst_na->retry && retry--) {
1625 /* XXX this is going to call nm_notify again.
1626 * Only useful for bwrap in virtual machines
1633 mtx_unlock(&kring->q_lock);
1636 d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1639 brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1644 /* nm_txsync callback for VALE ports */
1646 netmap_vp_txsync(struct netmap_kring *kring, int flags)
1648 struct netmap_vp_adapter *na =
1649 (struct netmap_vp_adapter *)kring->na;
1651 u_int const lim = kring->nkr_num_slots - 1;
1652 u_int const cur = kring->rcur;
1654 if (bridge_batch <= 0) { /* testing only */
1655 done = cur; // used all
1662 if (bridge_batch > NM_BDG_BATCH)
1663 bridge_batch = NM_BDG_BATCH;
1665 done = nm_bdg_preflush(kring, cur);
1668 D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail);
1670 * packets between 'done' and 'cur' are left unsent.
1672 kring->nr_hwcur = done;
1673 kring->nr_hwtail = nm_prev(done, lim);
1674 nm_txsync_finalize(kring);
1676 D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
1681 /* rxsync code used by VALE ports nm_rxsync callback and also
1682 * internally by the brwap
1685 netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
1687 struct netmap_adapter *na = kring->na;
1688 struct netmap_ring *ring = kring->ring;
1689 u_int nm_i, lim = kring->nkr_num_slots - 1;
1690 u_int head = nm_rxsync_prologue(kring);
1694 D("ouch dangerous reset!!!");
1695 n = netmap_ring_reinit(kring);
1699 /* First part, import newly received packets. */
1700 /* actually nothing to do here, they are already in the kring */
1702 /* Second part, skip past packets that userspace has released. */
1703 nm_i = kring->nr_hwcur;
1705 /* consistency check, but nothing really important here */
1706 for (n = 0; likely(nm_i != head); n++) {
1707 struct netmap_slot *slot = &ring->slot[nm_i];
1708 void *addr = NMB(na, slot);
1710 if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
1711 D("bad buffer index %d, ignore ?",
1714 slot->flags &= ~NS_BUF_CHANGED;
1715 nm_i = nm_next(nm_i, lim);
1717 kring->nr_hwcur = head;
1720 /* tell userspace that there are new packets */
1721 nm_rxsync_finalize(kring);
1728 * nm_rxsync callback for VALE ports
1729 * user process reading from a VALE switch.
1730 * Already protected against concurrent calls from userspace,
1731 * but we must acquire the queue's lock to protect against
1732 * writers on the same queue.
1735 netmap_vp_rxsync(struct netmap_kring *kring, int flags)
1739 mtx_lock(&kring->q_lock);
1740 n = netmap_vp_rxsync_locked(kring, flags);
1741 mtx_unlock(&kring->q_lock);
1746 /* nm_bdg_attach callback for VALE ports
1747 * The na_vp port is this same netmap_adapter. There is no host port.
1750 netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
1752 struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
1757 strncpy(na->name, name, sizeof(na->name));
1758 na->na_hostvp = NULL;
1762 /* create a netmap_vp_adapter that describes a VALE port.
1763 * Only persistent VALE ports have a non-null ifp.
1766 netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret)
1768 struct netmap_vp_adapter *vpna;
1769 struct netmap_adapter *na;
1773 vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1780 strncpy(na->name, nmr->nr_name, sizeof(na->name));
1782 /* bound checking */
1783 na->num_tx_rings = nmr->nr_tx_rings;
1784 nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1785 nmr->nr_tx_rings = na->num_tx_rings; // write back
1786 na->num_rx_rings = nmr->nr_rx_rings;
1787 nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
1788 nmr->nr_rx_rings = na->num_rx_rings; // write back
1789 nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1790 1, NM_BDG_MAXSLOTS, NULL);
1791 na->num_tx_desc = nmr->nr_tx_slots;
1792 nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1793 1, NM_BDG_MAXSLOTS, NULL);
1794 /* validate number of pipes. We want at least 1,
1795 * but probably can do with some more.
1796 * So let's use 2 as default (when 0 is supplied)
1798 npipes = nmr->nr_arg1;
1799 nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
1800 nmr->nr_arg1 = npipes; /* write back */
1801 /* validate extra bufs */
1802 nm_bound_var(&nmr->nr_arg3, 0, 0,
1803 128*NM_BDG_MAXSLOTS, NULL);
1804 na->num_rx_desc = nmr->nr_rx_slots;
1805 vpna->virt_hdr_len = 0;
1807 /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero??
1808 vpna->mfs = netmap_buf_size; */
1810 D("max frame size %u", vpna->mfs);
1812 na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1813 na->nm_txsync = netmap_vp_txsync;
1814 na->nm_rxsync = netmap_vp_rxsync;
1815 na->nm_register = netmap_vp_reg;
1816 na->nm_krings_create = netmap_vp_krings_create;
1817 na->nm_krings_delete = netmap_vp_krings_delete;
1818 na->nm_dtor = netmap_vp_dtor;
1819 na->nm_mem = netmap_mem_private_new(na->name,
1820 na->num_tx_rings, na->num_tx_desc,
1821 na->num_rx_rings, na->num_rx_desc,
1822 nmr->nr_arg3, npipes, &error);
1823 if (na->nm_mem == NULL)
1825 na->nm_bdg_attach = netmap_vp_bdg_attach;
1826 /* other nmd fields are set in the common routine */
1827 error = netmap_attach_common(na);
1834 if (na->nm_mem != NULL)
1835 netmap_mem_private_delete(na->nm_mem);
1836 free(vpna, M_DEVBUF);
1840 /* Bridge wrapper code (bwrap).
1841 * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
1843 * The main task is to swap the meaning of tx and rx rings to match the
1844 * expectations of the VALE switch code (see nm_bdg_flush).
1846 * The bwrap works by interposing a netmap_bwrap_adapter between the
1847 * rest of the system and the hwna. The netmap_bwrap_adapter looks like
1848 * a netmap_vp_adapter to the rest the system, but, internally, it
1849 * translates all callbacks to what the hwna expects.
1851 * Note that we have to intercept callbacks coming from two sides:
1853 * - callbacks coming from the netmap module are intercepted by
1854 * passing around the netmap_bwrap_adapter instead of the hwna
1856 * - callbacks coming from outside of the netmap module only know
1857 * about the hwna. This, however, only happens in interrupt
1858 * handlers, where only the hwna->nm_notify callback is called.
1859 * What the bwrap does is to overwrite the hwna->nm_notify callback
1860 * with its own netmap_bwrap_intr_notify.
1861 * XXX This assumes that the hwna->nm_notify callback was the
1862 * standard netmap_notify(), as it is the case for nic adapters.
1863 * Any additional action performed by hwna->nm_notify will not be
1864 * performed by netmap_bwrap_intr_notify.
1866 * Additionally, the bwrap can optionally attach the host rings pair
1867 * of the wrapped adapter to a different port of the switch.
1872 netmap_bwrap_dtor(struct netmap_adapter *na)
1874 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1875 struct netmap_adapter *hwna = bna->hwna;
1878 /* drop reference to hwna->ifp.
1879 * If we don't do this, netmap_detach_common(na)
1880 * will think it has set NA(na->ifp) to NULL
1883 /* for safety, also drop the possible reference
1886 bna->host.up.ifp = NULL;
1888 hwna->nm_mem = bna->save_nmd;
1889 hwna->na_private = NULL;
1890 hwna->na_vp = hwna->na_hostvp = NULL;
1891 hwna->na_flags &= ~NAF_BUSY;
1892 netmap_adapter_put(hwna);
1898 * Intr callback for NICs connected to a bridge.
1899 * Simply ignore tx interrupts (maybe we could try to recover space ?)
1900 * and pass received packets from nic to the bridge.
1902 * XXX TODO check locking: this is called from the interrupt
1903 * handler so we should make sure that the interface is not
1904 * disconnected while passing down an interrupt.
1906 * Note, no user process can access this NIC or the host stack.
1907 * The only part of the ring that is significant are the slots,
1908 * and head/cur/tail are set from the kring as needed
1909 * (part as a receive ring, part as a transmit ring).
1911 * callback that overwrites the hwna notify callback.
1912 * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1913 * The bridge wrapper then sends the packets through the bridge.
1916 netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1918 struct netmap_bwrap_adapter *bna = na->na_private;
1919 struct netmap_vp_adapter *hostna = &bna->host;
1920 struct netmap_kring *kring, *bkring;
1921 struct netmap_ring *ring;
1922 int is_host_ring = ring_nr == na->num_rx_rings;
1923 struct netmap_vp_adapter *vpna = &bna->up;
1927 D("%s %s%d 0x%x", na->name,
1928 (tx == NR_TX ? "TX" : "RX"), ring_nr, flags);
1930 if (flags & NAF_DISABLE_NOTIFY) {
1931 /* the enabled/disabled state of the ring has changed,
1932 * propagate the info to the wrapper (with tx/rx swapped)
1935 netmap_set_rxring(&vpna->up, ring_nr,
1936 na->tx_rings[ring_nr].nkr_stopped);
1938 netmap_set_txring(&vpna->up, ring_nr,
1939 na->rx_rings[ring_nr].nkr_stopped);
1944 if (!nm_netmap_on(na))
1947 /* we only care about receive interrupts */
1951 kring = &na->rx_rings[ring_nr];
1954 /* make sure the ring is not disabled */
1955 if (nm_kr_tryget(kring))
1958 if (is_host_ring && hostna->na_bdg == NULL) {
1959 error = bna->save_notify(na, ring_nr, tx, flags);
1963 /* Here we expect ring->head = ring->cur = ring->tail
1964 * because everything has been released from the previous round.
1965 * However the ring is shared and we might have info from
1966 * the wrong side (the tx ring). Hence we overwrite with
1967 * the info from the rx kring.
1970 D("%s head %d cur %d tail %d (kring %d %d %d)", na->name,
1971 ring->head, ring->cur, ring->tail,
1972 kring->rhead, kring->rcur, kring->rtail);
1974 ring->head = kring->rhead;
1975 ring->cur = kring->rcur;
1976 ring->tail = kring->rtail;
1982 /* simulate a user wakeup on the rx ring */
1983 /* fetch packets that have arrived.
1984 * XXX maybe do this in a loop ?
1986 error = kring->nm_sync(kring, 0);
1989 if (kring->nr_hwcur == kring->nr_hwtail && netmap_verbose) {
1990 D("how strange, interrupt with no packets on %s",
1995 /* new packets are ring->cur to ring->tail, and the bkring
1996 * had hwcur == ring->cur. So advance ring->cur to ring->tail
1997 * to push all packets out.
1999 ring->head = ring->cur = ring->tail;
2001 /* also set tail to what the bwrap expects */
2002 bkring = &vpna->up.tx_rings[ring_nr];
2003 ring->tail = bkring->nr_hwtail; // rtail too ?
2005 /* pass packets to the switch */
2006 nm_txsync_prologue(bkring); // XXX error checking ?
2007 netmap_vp_txsync(bkring, flags);
2009 /* mark all buffers as released on this ring */
2010 ring->head = ring->cur = kring->nr_hwtail;
2011 ring->tail = kring->rtail;
2012 /* another call to actually release the buffers */
2013 if (!is_host_ring) {
2014 error = kring->nm_sync(kring, 0);
2016 /* mark all packets as released, as in the
2017 * second part of netmap_rxsync_from_host()
2019 kring->nr_hwcur = kring->nr_hwtail;
2020 nm_rxsync_finalize(kring);
2029 /* nm_register callback for bwrap */
2031 netmap_bwrap_register(struct netmap_adapter *na, int onoff)
2033 struct netmap_bwrap_adapter *bna =
2034 (struct netmap_bwrap_adapter *)na;
2035 struct netmap_adapter *hwna = bna->hwna;
2036 struct netmap_vp_adapter *hostna = &bna->host;
2039 ND("%s %s", na->name, onoff ? "on" : "off");
2044 /* netmap_do_regif has been called on the bwrap na.
2045 * We need to pass the information about the
2046 * memory allocator down to the hwna before
2047 * putting it in netmap mode
2049 hwna->na_lut = na->na_lut;
2050 hwna->na_lut_objtotal = na->na_lut_objtotal;
2051 hwna->na_lut_objsize = na->na_lut_objsize;
2053 if (hostna->na_bdg) {
2054 /* if the host rings have been attached to switch,
2055 * we need to copy the memory allocator information
2056 * in the hostna also
2058 hostna->up.na_lut = na->na_lut;
2059 hostna->up.na_lut_objtotal = na->na_lut_objtotal;
2060 hostna->up.na_lut_objsize = na->na_lut_objsize;
2063 /* cross-link the netmap rings
2064 * The original number of rings comes from hwna,
2065 * rx rings on one side equals tx rings on the other.
2066 * We need to do this now, after the initialization
2067 * of the kring->ring pointers
2069 for (i = 0; i < na->num_rx_rings + 1; i++) {
2070 hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
2071 hwna->tx_rings[i].ring = na->rx_rings[i].ring;
2073 for (i = 0; i < na->num_tx_rings + 1; i++) {
2074 hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
2075 hwna->rx_rings[i].ring = na->tx_rings[i].ring;
2079 /* forward the request to the hwna */
2080 error = hwna->nm_register(hwna, onoff);
2084 /* impersonate a netmap_vp_adapter */
2085 netmap_vp_reg(na, onoff);
2087 netmap_vp_reg(&hostna->up, onoff);
2090 /* intercept the hwna nm_nofify callback */
2091 bna->save_notify = hwna->nm_notify;
2092 hwna->nm_notify = netmap_bwrap_intr_notify;
2094 hwna->nm_notify = bna->save_notify;
2095 hwna->na_lut = NULL;
2096 hwna->na_lut_objtotal = 0;
2097 hwna->na_lut_objsize = 0;
2103 /* nm_config callback for bwrap */
2105 netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
2106 u_int *rxr, u_int *rxd)
2108 struct netmap_bwrap_adapter *bna =
2109 (struct netmap_bwrap_adapter *)na;
2110 struct netmap_adapter *hwna = bna->hwna;
2112 /* forward the request */
2113 netmap_update_config(hwna);
2114 /* swap the results */
2115 *txr = hwna->num_rx_rings;
2116 *txd = hwna->num_rx_desc;
2117 *rxr = hwna->num_tx_rings;
2118 *rxd = hwna->num_rx_desc;
2124 /* nm_krings_create callback for bwrap */
2126 netmap_bwrap_krings_create(struct netmap_adapter *na)
2128 struct netmap_bwrap_adapter *bna =
2129 (struct netmap_bwrap_adapter *)na;
2130 struct netmap_adapter *hwna = bna->hwna;
2131 struct netmap_adapter *hostna = &bna->host.up;
2136 /* impersonate a netmap_vp_adapter */
2137 error = netmap_vp_krings_create(na);
2141 /* also create the hwna krings */
2142 error = hwna->nm_krings_create(hwna);
2144 netmap_vp_krings_delete(na);
2147 /* the connection between the bwrap krings and the hwna krings
2148 * will be perfomed later, in the nm_register callback, since
2149 * now the kring->ring pointers have not been initialized yet
2152 if (na->na_flags & NAF_HOST_RINGS) {
2153 /* the hostna rings are the host rings of the bwrap.
2154 * The corresponding krings must point back to the
2157 hostna->tx_rings = na->tx_rings + na->num_tx_rings;
2158 hostna->tx_rings[0].na = hostna;
2159 hostna->rx_rings = na->rx_rings + na->num_rx_rings;
2160 hostna->rx_rings[0].na = hostna;
2168 netmap_bwrap_krings_delete(struct netmap_adapter *na)
2170 struct netmap_bwrap_adapter *bna =
2171 (struct netmap_bwrap_adapter *)na;
2172 struct netmap_adapter *hwna = bna->hwna;
2176 hwna->nm_krings_delete(hwna);
2177 netmap_vp_krings_delete(na);
2181 /* notify method for the bridge-->hwna direction */
2183 netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2185 struct netmap_bwrap_adapter *bna =
2186 (struct netmap_bwrap_adapter *)na;
2187 struct netmap_adapter *hwna = bna->hwna;
2188 struct netmap_kring *kring, *hw_kring;
2189 struct netmap_ring *ring;
2196 kring = &na->rx_rings[ring_n];
2197 hw_kring = &hwna->tx_rings[ring_n];
2199 lim = kring->nkr_num_slots - 1;
2201 if (!nm_netmap_on(hwna))
2203 mtx_lock(&kring->q_lock);
2204 /* first step: simulate a user wakeup on the rx ring */
2205 netmap_vp_rxsync_locked(kring, flags);
2206 ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2208 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2209 ring->head, ring->cur, ring->tail,
2210 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
2211 /* second step: the simulated user consumes all new packets */
2212 ring->head = ring->cur = ring->tail;
2214 /* third step: the new packets are sent on the tx ring
2215 * (which is actually the same ring)
2217 /* set tail to what the hw expects */
2218 ring->tail = hw_kring->rtail;
2219 nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
2220 error = hw_kring->nm_sync(hw_kring, flags);
2222 /* fourth step: now we are back the rx ring */
2223 /* claim ownership on all hw owned bufs */
2224 ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */
2225 ring->tail = kring->rtail; /* restore saved value of tail, for safety */
2227 /* fifth step: the user goes to sleep again, causing another rxsync */
2228 netmap_vp_rxsync_locked(kring, flags);
2229 ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
2231 kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
2232 ring->head, ring->cur, ring->tail,
2233 hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
2234 mtx_unlock(&kring->q_lock);
2239 /* notify method for the bridge-->host-rings path */
2241 netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
2243 struct netmap_bwrap_adapter *bna = na->na_private;
2244 struct netmap_adapter *port_na = &bna->up.up;
2245 if (tx == NR_TX || ring_n != 0)
2247 return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
2251 /* nm_bdg_ctl callback for the bwrap.
2252 * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
2253 * On attach, it needs to provide a fake netmap_priv_d structure and
2254 * perform a netmap_do_regif() on the bwrap. This will put both the
2255 * bwrap and the hwna in netmap mode, with the netmap rings shared
2256 * and cross linked. Moroever, it will start intercepting interrupts
2260 netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
2262 struct netmap_priv_d *npriv;
2263 struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
2264 struct netmap_if *nifp;
2268 if (NETMAP_OWNED_BY_ANY(na)) {
2271 if (bna->na_kpriv) {
2275 npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
2278 nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error);
2280 bzero(npriv, sizeof(*npriv));
2281 free(npriv, M_DEVBUF);
2284 bna->na_kpriv = npriv;
2285 na->na_flags |= NAF_BUSY;
2289 if (na->active_fds == 0) /* not registered */
2291 last_instance = netmap_dtor_locked(bna->na_kpriv);
2292 if (!last_instance) {
2293 D("--- error, trying to detach an entry with active mmaps");
2296 struct nm_bridge *b = bna->up.na_bdg,
2297 *bh = bna->host.na_bdg;
2298 npriv = bna->na_kpriv;
2299 bna->na_kpriv = NULL;
2302 bzero(npriv, sizeof(*npriv));
2303 free(npriv, M_DEVBUF);
2305 /* XXX the bwrap dtor should take care
2306 * of this (2014-06-16)
2308 netmap_bdg_detach_common(b, bna->up.bdg_port,
2309 (bh ? bna->host.bdg_port : -1));
2311 na->na_flags &= ~NAF_BUSY;
2318 /* attach a bridge wrapper to the 'real' device */
2320 netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
2322 struct netmap_bwrap_adapter *bna;
2323 struct netmap_adapter *na = NULL;
2324 struct netmap_adapter *hostna = NULL;
2327 /* make sure the NIC is not already in use */
2328 if (NETMAP_OWNED_BY_ANY(hwna)) {
2329 D("NIC %s busy, cannot attach to bridge", hwna->name);
2333 bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
2339 strncpy(na->name, nr_name, sizeof(na->name));
2340 /* fill the ring data for the bwrap adapter with rx/tx meanings
2341 * swapped. The real cross-linking will be done during register,
2342 * when all the krings will have been created.
2344 na->num_rx_rings = hwna->num_tx_rings;
2345 na->num_tx_rings = hwna->num_rx_rings;
2346 na->num_tx_desc = hwna->num_rx_desc;
2347 na->num_rx_desc = hwna->num_tx_desc;
2348 na->nm_dtor = netmap_bwrap_dtor;
2349 na->nm_register = netmap_bwrap_register;
2350 // na->nm_txsync = netmap_bwrap_txsync;
2351 // na->nm_rxsync = netmap_bwrap_rxsync;
2352 na->nm_config = netmap_bwrap_config;
2353 na->nm_krings_create = netmap_bwrap_krings_create;
2354 na->nm_krings_delete = netmap_bwrap_krings_delete;
2355 na->nm_notify = netmap_bwrap_notify;
2356 na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
2357 na->pdev = hwna->pdev;
2358 na->nm_mem = netmap_mem_private_new(na->name,
2359 na->num_tx_rings, na->num_tx_desc,
2360 na->num_rx_rings, na->num_rx_desc,
2362 na->na_flags |= NAF_MEM_OWNER;
2363 if (na->nm_mem == NULL)
2365 bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
2368 netmap_adapter_get(hwna);
2369 hwna->na_private = bna; /* weak reference */
2370 hwna->na_vp = &bna->up;
2372 if (hwna->na_flags & NAF_HOST_RINGS) {
2373 if (hwna->na_flags & NAF_SW_ONLY)
2374 na->na_flags |= NAF_SW_ONLY;
2375 na->na_flags |= NAF_HOST_RINGS;
2376 hostna = &bna->host.up;
2377 snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
2378 hostna->ifp = hwna->ifp;
2379 hostna->num_tx_rings = 1;
2380 hostna->num_tx_desc = hwna->num_rx_desc;
2381 hostna->num_rx_rings = 1;
2382 hostna->num_rx_desc = hwna->num_tx_desc;
2383 // hostna->nm_txsync = netmap_bwrap_host_txsync;
2384 // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
2385 hostna->nm_notify = netmap_bwrap_host_notify;
2386 hostna->nm_mem = na->nm_mem;
2387 hostna->na_private = bna;
2388 hostna->na_vp = &bna->up;
2389 na->na_hostvp = hwna->na_hostvp =
2390 hostna->na_hostvp = &bna->host;
2391 hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
2394 ND("%s<->%s txr %d txd %d rxr %d rxd %d",
2395 na->name, ifp->if_xname,
2396 na->num_tx_rings, na->num_tx_desc,
2397 na->num_rx_rings, na->num_rx_desc);
2399 error = netmap_attach_common(na);
2403 /* make bwrap ifp point to the real ifp
2404 * NOTE: netmap_attach_common() interprets a non-NULL na->ifp
2405 * as a request to make the ifp point to the na. Since we
2406 * do not want to change the na already pointed to by hwna->ifp,
2407 * the following assignment has to be delayed until now
2409 na->ifp = hwna->ifp;
2410 hwna->na_flags |= NAF_BUSY;
2411 /* make hwna point to the allocator we are actually using,
2412 * so that monitors will be able to find it
2414 bna->save_nmd = hwna->nm_mem;
2415 hwna->nm_mem = na->nm_mem;
2419 netmap_mem_private_delete(na->nm_mem);
2421 hwna->na_vp = hwna->na_hostvp = NULL;
2422 netmap_adapter_put(hwna);
2423 free(bna, M_DEVBUF);
2430 netmap_init_bridges(void)
2433 bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
2434 for (i = 0; i < NM_BRIDGES; i++)
2435 BDG_RWINIT(&nm_bridges[i]);
2437 #endif /* WITH_VALE */