2 * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * netmap monitors can be used to do zero-copy monitoring of network traffic
32 * on another adapter, when the latter adapter is working in netmap mode.
34 * Monitors offer to userspace the same interface as any other netmap port,
35 * with as many pairs of netmap rings as the monitored adapter.
36 * However, only the rx rings are actually used. Each monitor rx ring receives
37 * the traffic transiting on both the tx and rx corresponding rings in the
38 * monitored adapter. During registration, the user can choose if she wants
39 * to intercept tx only, rx only, or both tx and rx traffic.
41 * The monitor only sees the frames after they have been consumed in the
44 * - For tx traffic, this is after the slots containing the frames have been
45 * marked as free. Note that this may happen at a considerably delay after
46 * frame transmission, since freeing of slots is often done lazily.
48 * - For rx traffic, this is after the consumer on the monitored adapter
49 * has released them. In most cases, the consumer is a userspace
50 * application which may have modified the frame contents.
52 * If the monitor is not able to cope with the stream of frames, excess traffic
55 * Each ring can be monitored by at most one monitor. This may change in the
56 * future, if we implement monitor chaining.
61 #if defined(__FreeBSD__)
62 #include <sys/cdefs.h> /* prerequisite */
64 #include <sys/types.h>
65 #include <sys/errno.h>
66 #include <sys/param.h> /* defines used in kernel.h */
67 #include <sys/kernel.h> /* types used in module initialization */
68 #include <sys/malloc.h>
71 #include <sys/rwlock.h>
72 #include <sys/selinfo.h>
73 #include <sys/sysctl.h>
74 #include <sys/socket.h> /* sockaddrs */
76 #include <net/if_var.h>
77 #include <machine/bus.h> /* bus_dmamap_* */
78 #include <sys/refcount.h>
85 #elif defined(__APPLE__)
87 #warning OSX support is only partial
92 #error Unsupported platform
94 #endif /* unsupported */
100 #include <net/netmap.h>
101 #include <dev/netmap/netmap_kern.h>
102 #include <dev/netmap/netmap_mem2.h>
106 #define NM_MONITOR_MAXSLOTS 4096
108 /* monitor works by replacing the nm_sync callbacks in the monitored rings.
109 * The actions to be performed are the same on both tx and rx rings, so we
110 * have collected them here
113 netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr)
115 struct netmap_monitor_adapter *mna = kring->monitor;
116 struct netmap_kring *mkring = &mna->up.rx_rings[kring->ring_id];
117 struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
119 int rel_slots, free_slots, busy;
121 u_int lim = kring->nkr_num_slots - 1,
122 mlim = mkring->nkr_num_slots - 1;
124 /* get the relased slots (rel_slots) */
126 error = kring->save_sync(kring, flags);
130 rel_slots = end - beg;
132 rel_slots += kring->nkr_num_slots;
138 /* we need to lock the monitor receive ring, since it
139 * is the target of bot tx and rx traffic from the monitored
142 mtx_lock(&mkring->q_lock);
143 /* get the free slots available on the monitor ring */
144 i = mkring->nr_hwtail;
145 busy = i - mkring->nr_hwcur;
147 busy += mkring->nkr_num_slots;
148 free_slots = mlim - busy;
151 mtx_unlock(&mkring->q_lock);
155 /* swap min(free_slots, rel_slots) slots */
156 if (free_slots < rel_slots) {
157 beg += (rel_slots - free_slots);
160 rel_slots = free_slots;
163 for ( ; rel_slots; rel_slots--) {
164 struct netmap_slot *s = &ring->slot[beg];
165 struct netmap_slot *ms = &mring->slot[i];
169 ms->buf_idx = s->buf_idx;
176 s->flags |= NS_BUF_CHANGED;
178 beg = nm_next(beg, lim);
179 i = nm_next(i, mlim);
183 mkring->nr_hwtail = i;
185 mtx_unlock(&mkring->q_lock);
186 /* notify the new frames to the monitor */
187 mna->up.nm_notify(&mna->up, mkring->ring_id, NR_RX, 0);
191 /* callback used to replace the nm_sync callback in the monitored tx rings */
193 netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
195 ND("%s %x", kring->name, flags);
196 return netmap_monitor_parent_sync(kring, flags, &kring->nr_hwtail);
199 /* callback used to replace the nm_sync callback in the monitored rx rings */
201 netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
203 ND("%s %x", kring->name, flags);
204 return netmap_monitor_parent_sync(kring, flags, &kring->rcur);
207 /* nm_sync callback for the monitor's own tx rings.
208 * This makes no sense and always returns error
211 netmap_monitor_txsync(struct netmap_kring *kring, int flags)
213 D("%s %x", kring->name, flags);
217 /* nm_sync callback for the monitor's own rx rings.
218 * Note that the lock in netmap_monitor_parent_sync only protects
219 * writers among themselves. Synchronization between writers
220 * (i.e., netmap_monitor_parent_txsync and netmap_monitor_parent_rxsync)
221 * and readers (i.e., netmap_monitor_rxsync) relies on memory barriers.
224 netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
226 ND("%s %x", kring->name, flags);
227 kring->nr_hwcur = kring->rcur;
229 nm_rxsync_finalize(kring);
233 /* nm_krings_create callbacks for monitors.
234 * We could use the default netmap_hw_krings_monitor, but
235 * we don't need the mbq.
238 netmap_monitor_krings_create(struct netmap_adapter *na)
240 return netmap_krings_create(na, 0);
244 /* nm_register callback for monitors.
246 * On registration, replace the nm_sync callbacks in the monitored
247 * rings with our own, saving the previous ones in the monitored
248 * rings themselves, where they are used by netmap_monitor_parent_sync.
250 * On de-registration, restore the original callbacks. We need to
251 * stop traffic while we are doing this, since the monitored adapter may
252 * have already started executing a netmap_monitor_parent_sync
253 * and may not like the kring->save_sync pointer to become NULL.
256 netmap_monitor_reg(struct netmap_adapter *na, int onoff)
258 struct netmap_monitor_adapter *mna =
259 (struct netmap_monitor_adapter *)na;
260 struct netmap_priv_d *priv = &mna->priv;
261 struct netmap_adapter *pna = priv->np_na;
262 struct netmap_kring *kring;
265 ND("%p: onoff %d", na, onoff);
267 if (!nm_netmap_on(pna)) {
268 /* parent left netmap mode, fatal */
271 if (mna->flags & NR_MONITOR_TX) {
272 for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
273 kring = &pna->tx_rings[i];
274 kring->save_sync = kring->nm_sync;
275 kring->nm_sync = netmap_monitor_parent_txsync;
278 if (mna->flags & NR_MONITOR_RX) {
279 for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
280 kring = &pna->rx_rings[i];
281 kring->save_sync = kring->nm_sync;
282 kring->nm_sync = netmap_monitor_parent_rxsync;
285 na->na_flags |= NAF_NETMAP_ON;
287 if (!nm_netmap_on(pna)) {
288 /* parent left netmap mode, nothing to restore */
291 na->na_flags &= ~NAF_NETMAP_ON;
292 if (mna->flags & NR_MONITOR_TX) {
293 for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
294 netmap_set_txring(pna, i, 1 /* stopped */);
295 kring = &pna->tx_rings[i];
296 kring->nm_sync = kring->save_sync;
297 kring->save_sync = NULL;
298 netmap_set_txring(pna, i, 0 /* enabled */);
301 if (mna->flags & NR_MONITOR_RX) {
302 for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
303 netmap_set_rxring(pna, i, 1 /* stopped */);
304 kring = &pna->rx_rings[i];
305 kring->nm_sync = kring->save_sync;
306 kring->save_sync = NULL;
307 netmap_set_rxring(pna, i, 0 /* enabled */);
313 /* nm_krings_delete callback for monitors */
315 netmap_monitor_krings_delete(struct netmap_adapter *na)
317 netmap_krings_delete(na);
321 /* nm_dtor callback for monitors */
323 netmap_monitor_dtor(struct netmap_adapter *na)
325 struct netmap_monitor_adapter *mna =
326 (struct netmap_monitor_adapter *)na;
327 struct netmap_priv_d *priv = &mna->priv;
328 struct netmap_adapter *pna = priv->np_na;
332 if (nm_netmap_on(pna)) {
333 /* parent still in netmap mode, mark its krings as free */
334 if (mna->flags & NR_MONITOR_TX) {
335 for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
336 pna->tx_rings[i].monitor = NULL;
339 if (mna->flags & NR_MONITOR_RX) {
340 for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
341 pna->rx_rings[i].monitor = NULL;
345 netmap_adapter_put(pna);
349 /* check if nmr is a request for a monitor adapter that we can satisfy */
351 netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
354 struct netmap_adapter *pna; /* parent adapter */
355 struct netmap_monitor_adapter *mna;
358 if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
362 /* this is a request for a monitor adapter */
364 D("flags %x", nmr->nr_flags);
366 mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
372 /* first, try to find the adapter that we want to monitor
373 * We use the same nmr, after we have turned off the monitor flags.
374 * In this way we can potentially monitor everything netmap understands,
375 * except other monitors.
377 memcpy(&pnmr, nmr, sizeof(pnmr));
378 pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX);
379 error = netmap_get_na(&pnmr, &pna, create);
381 D("parent lookup failed: %d", error);
384 D("found parent: %s", pna->name);
386 if (!nm_netmap_on(pna)) {
387 /* parent not in netmap mode */
388 /* XXX we can wait for the parent to enter netmap mode,
389 * by intercepting its nm_register callback (2014-03-16)
391 D("%s not in netmap mode", pna->name);
396 /* grab all the rings we need in the parent */
397 mna->priv.np_na = pna;
398 error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags);
403 if (nmr->nr_flags & NR_MONITOR_TX) {
404 for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
405 struct netmap_kring *kring = &pna->tx_rings[i];
406 if (kring->monitor) {
411 kring->monitor = mna;
414 if (nmr->nr_flags & NR_MONITOR_RX) {
415 for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
416 struct netmap_kring *kring = &pna->rx_rings[i];
417 if (kring->monitor) {
422 kring->monitor = mna;
426 snprintf(mna->up.name, sizeof(mna->up.name), "mon:%s", pna->name);
428 /* the monitor supports the host rings iff the parent does */
429 mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS);
430 mna->up.nm_txsync = netmap_monitor_txsync;
431 mna->up.nm_rxsync = netmap_monitor_rxsync;
432 mna->up.nm_register = netmap_monitor_reg;
433 mna->up.nm_dtor = netmap_monitor_dtor;
434 mna->up.nm_krings_create = netmap_monitor_krings_create;
435 mna->up.nm_krings_delete = netmap_monitor_krings_delete;
436 mna->up.nm_mem = pna->nm_mem;
437 mna->up.na_lut = pna->na_lut;
438 mna->up.na_lut_objtotal = pna->na_lut_objtotal;
439 mna->up.na_lut_objsize = pna->na_lut_objsize;
441 mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero
442 /* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
445 mna->up.num_rx_rings = pna->num_rx_rings;
446 if (pna->num_tx_rings > pna->num_rx_rings)
447 mna->up.num_rx_rings = pna->num_tx_rings;
448 /* by default, the number of slots is the same as in
449 * the parent rings, but the user may ask for a different
452 mna->up.num_tx_desc = nmr->nr_tx_slots;
453 nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
454 1, NM_MONITOR_MAXSLOTS, NULL);
455 mna->up.num_rx_desc = nmr->nr_rx_slots;
456 nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
457 1, NM_MONITOR_MAXSLOTS, NULL);
458 error = netmap_attach_common(&mna->up);
460 D("attach_common error");
464 /* remember the traffic directions we have to monitor */
465 mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX));
468 netmap_adapter_get(*na);
470 /* write the configuration back */
471 nmr->nr_tx_rings = mna->up.num_tx_rings;
472 nmr->nr_rx_rings = mna->up.num_rx_rings;
473 nmr->nr_tx_slots = mna->up.num_tx_desc;
474 nmr->nr_rx_slots = mna->up.num_rx_desc;
476 /* keep the reference to the parent */
483 for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
484 if (pna->tx_rings[i].monitor == mna)
485 pna->tx_rings[i].monitor = NULL;
487 for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
488 if (pna->rx_rings[i].monitor == mna)
489 pna->rx_rings[i].monitor = NULL;
492 netmap_adapter_put(pna);
498 #endif /* WITH_MONITOR */