2 * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * netmap monitors can be used to do monitoring of network traffic
32 * on another adapter, when the latter adapter is working in netmap mode.
34 * Monitors offer to userspace the same interface as any other netmap port,
35 * with as many pairs of netmap rings as the monitored adapter.
36 * However, only the rx rings are actually used. Each monitor rx ring receives
37 * the traffic transiting on both the tx and rx corresponding rings in the
38 * monitored adapter. During registration, the user can choose if she wants
39 * to intercept tx only, rx only, or both tx and rx traffic.
41 * If the monitor is not able to cope with the stream of frames, excess traffic
44 * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
46 * Monitors can be either zero-copy or copy-based.
48 * Copy monitors see the frames before they are consumed:
50 * - For tx traffic, this is when the application sends them, before they are
51 * passed down to the adapter.
53 * - For rx traffic, this is when they are received by the adapter, before
54 * they are sent up to the application, if any (note that, if no
55 * application is reading from a monitored ring, the ring will eventually
56 * fill up and traffic will stop).
58 * Zero-copy monitors only see the frames after they have been consumed:
60 * - For tx traffic, this is after the slots containing the frames have been
61 * marked as free. Note that this may happen at a considerably delay after
62 * frame transmission, since freeing of slots is often done lazily.
64 * - For rx traffic, this is after the consumer on the monitored adapter
65 * has released them. In most cases, the consumer is a userspace
66 * application which may have modified the frame contents.
68 * Several copy monitors may be active on any ring. Zero-copy monitors,
69 * instead, need exclusive access to each of the monitored rings. This may
70 * change in the future, if we implement zero-copy monitor chaining.
75 #if defined(__FreeBSD__)
76 #include <sys/cdefs.h> /* prerequisite */
78 #include <sys/types.h>
79 #include <sys/errno.h>
80 #include <sys/param.h> /* defines used in kernel.h */
81 #include <sys/kernel.h> /* types used in module initialization */
82 #include <sys/malloc.h>
85 #include <sys/rwlock.h>
86 #include <sys/selinfo.h>
87 #include <sys/sysctl.h>
88 #include <sys/socket.h> /* sockaddrs */
90 #include <net/if_var.h>
91 #include <machine/bus.h> /* bus_dmamap_* */
92 #include <sys/refcount.h>
99 #elif defined(__APPLE__)
101 #warning OSX support is only partial
102 #include "osx_glue.h"
106 #error Unsupported platform
108 #endif /* unsupported */
114 #include <net/netmap.h>
115 #include <dev/netmap/netmap_kern.h>
116 #include <dev/netmap/netmap_mem2.h>
120 #define NM_MONITOR_MAXSLOTS 4096
123 ********************************************************************
124 * functions common to both kind of monitors
125 ********************************************************************
128 /* nm_sync callback for the monitor's own tx rings.
129 * This makes no sense and always returns error
132 netmap_monitor_txsync(struct netmap_kring *kring, int flags)
134 RD(1, "%s %x", kring->name, flags);
138 /* nm_sync callback for the monitor's own rx rings.
139 * Note that the lock in netmap_zmon_parent_sync only protects
140 * writers among themselves. Synchronization between writers
141 * (i.e., netmap_zmon_parent_txsync and netmap_zmon_parent_rxsync)
142 * and readers (i.e., netmap_zmon_rxsync) relies on memory barriers.
145 netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
147 ND("%s %x", kring->name, flags);
148 kring->nr_hwcur = kring->rcur;
153 /* nm_krings_create callbacks for monitors.
154 * We could use the default netmap_hw_krings_zmon, but
155 * we don't need the mbq.
158 netmap_monitor_krings_create(struct netmap_adapter *na)
160 return netmap_krings_create(na, 0);
163 /* nm_krings_delete callback for monitors */
165 netmap_monitor_krings_delete(struct netmap_adapter *na)
167 netmap_krings_delete(na);
172 nm_txrx2flag(enum txrx t)
174 return (t == NR_RX ? NR_MONITOR_RX : NR_MONITOR_TX);
177 /* allocate the monitors array in the monitored kring */
179 nm_monitor_alloc(struct netmap_kring *kring, u_int n)
182 struct netmap_kring **nm;
184 if (n <= kring->max_monitors)
185 /* we already have more entries that requested */
188 len = sizeof(struct netmap_kring *) * n;
189 nm = realloc(kring->monitors, len, M_DEVBUF, M_NOWAIT | M_ZERO);
193 kring->monitors = nm;
194 kring->max_monitors = n;
199 /* deallocate the parent array in the parent adapter */
201 nm_monitor_dealloc(struct netmap_kring *kring)
203 if (kring->monitors) {
204 if (kring->n_monitors > 0) {
205 D("freeing not empty monitor array for %s (%d dangling monitors)!", kring->name,
208 free(kring->monitors, M_DEVBUF);
209 kring->monitors = NULL;
210 kring->max_monitors = 0;
211 kring->n_monitors = 0;
216 * monitors work by replacing the nm_sync() and possibly the
217 * nm_notify() callbacks in the monitored rings.
219 static int netmap_zmon_parent_txsync(struct netmap_kring *, int);
220 static int netmap_zmon_parent_rxsync(struct netmap_kring *, int);
221 static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
222 static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
223 static int netmap_monitor_parent_notify(struct netmap_kring *, int);
226 /* add the monitor mkring to the list of monitors of kring.
227 * If this is the first monitor, intercept the callbacks
230 netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zcopy)
234 /* sinchronize with concurrently running nm_sync()s */
236 /* make sure the monitor array exists and is big enough */
237 error = nm_monitor_alloc(kring, kring->n_monitors + 1);
240 kring->monitors[kring->n_monitors] = mkring;
241 mkring->mon_pos = kring->n_monitors;
243 if (kring->n_monitors == 1) {
244 /* this is the first monitor, intercept callbacks */
245 D("%s: intercept callbacks on %s", mkring->name, kring->name);
246 kring->mon_sync = kring->nm_sync;
247 /* zcopy monitors do not override nm_notify(), but
248 * we save the original one regardless, so that
249 * netmap_monitor_del() does not need to know the
252 kring->mon_notify = kring->nm_notify;
253 if (kring->tx == NR_TX) {
254 kring->nm_sync = (zcopy ? netmap_zmon_parent_txsync :
255 netmap_monitor_parent_txsync);
257 kring->nm_sync = (zcopy ? netmap_zmon_parent_rxsync :
258 netmap_monitor_parent_rxsync);
260 /* also intercept notify */
261 kring->nm_notify = netmap_monitor_parent_notify;
262 kring->mon_tail = kring->nr_hwtail;
273 /* remove the monitor mkring from the list of monitors of kring.
274 * If this is the last monitor, restore the original callbacks
277 netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
279 /* sinchronize with concurrently running nm_sync()s */
282 if (mkring->mon_pos != kring->n_monitors) {
283 kring->monitors[mkring->mon_pos] = kring->monitors[kring->n_monitors];
284 kring->monitors[mkring->mon_pos]->mon_pos = mkring->mon_pos;
286 kring->monitors[kring->n_monitors] = NULL;
287 if (kring->n_monitors == 0) {
288 /* this was the last monitor, restore callbacks and delete monitor array */
289 D("%s: restoring sync on %s: %p", mkring->name, kring->name, kring->mon_sync);
290 kring->nm_sync = kring->mon_sync;
291 kring->mon_sync = NULL;
292 if (kring->tx == NR_RX) {
293 D("%s: restoring notify on %s: %p",
294 mkring->name, kring->name, kring->mon_notify);
295 kring->nm_notify = kring->mon_notify;
296 kring->mon_notify = NULL;
298 nm_monitor_dealloc(kring);
304 /* This is called when the monitored adapter leaves netmap mode
305 * (see netmap_do_unregif).
306 * We need to notify the monitors that the monitored rings are gone.
307 * We do this by setting their mna->priv.np_na to NULL.
308 * Note that the rings are already stopped when this happens, so
309 * no monitor ring callback can be active.
312 netmap_monitor_stop(struct netmap_adapter *na)
319 for (i = 0; i < nma_get_nrings(na, t); i++) {
320 struct netmap_kring *kring = &NMR(na, t)[i];
323 for (j = 0; j < kring->n_monitors; j++) {
324 struct netmap_kring *mkring =
326 struct netmap_monitor_adapter *mna =
327 (struct netmap_monitor_adapter *)mkring->na;
328 /* forget about this adapter */
329 netmap_adapter_put(mna->priv.np_na);
330 mna->priv.np_na = NULL;
337 /* common functions for the nm_register() callbacks of both kind of
341 netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
343 struct netmap_monitor_adapter *mna =
344 (struct netmap_monitor_adapter *)na;
345 struct netmap_priv_d *priv = &mna->priv;
346 struct netmap_adapter *pna = priv->np_na;
347 struct netmap_kring *kring, *mkring;
351 ND("%p: onoff %d", na, onoff);
354 /* parent left netmap mode, fatal */
355 D("%s: internal error", na->name);
359 if (mna->flags & nm_txrx2flag(t)) {
360 for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
361 kring = &NMR(pna, t)[i];
362 mkring = &na->rx_rings[i];
363 netmap_monitor_add(mkring, kring, zmon);
367 na->na_flags |= NAF_NETMAP_ON;
370 D("%s: parent left netmap mode, nothing to restore", na->name);
373 na->na_flags &= ~NAF_NETMAP_ON;
375 if (mna->flags & nm_txrx2flag(t)) {
376 for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
377 kring = &NMR(pna, t)[i];
378 mkring = &na->rx_rings[i];
379 netmap_monitor_del(mkring, kring);
388 ****************************************************************
389 * functions specific for zero-copy monitors
390 ****************************************************************
394 * Common function for both zero-copy tx and rx nm_sync()
398 netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
400 struct netmap_kring *mkring = kring->monitors[0];
401 struct netmap_ring *ring = kring->ring, *mring;
403 int rel_slots, free_slots, busy, sent = 0;
405 u_int lim = kring->nkr_num_slots - 1,
406 mlim; // = mkring->nkr_num_slots - 1;
408 if (mkring == NULL) {
409 RD(5, "NULL monitor on %s", kring->name);
412 mring = mkring->ring;
413 mlim = mkring->nkr_num_slots - 1;
415 /* get the relased slots (rel_slots) */
417 beg = kring->nr_hwtail;
418 error = kring->mon_sync(kring, flags);
421 end = kring->nr_hwtail;
423 beg = kring->nr_hwcur;
427 rel_slots = end - beg;
429 rel_slots += kring->nkr_num_slots;
432 /* no released slots, but we still need
433 * to call rxsync if this is a rx ring
438 /* we need to lock the monitor receive ring, since it
439 * is the target of bot tx and rx traffic from the monitored
442 mtx_lock(&mkring->q_lock);
443 /* get the free slots available on the monitor ring */
444 i = mkring->nr_hwtail;
445 busy = i - mkring->nr_hwcur;
447 busy += mkring->nkr_num_slots;
448 free_slots = mlim - busy;
453 /* swap min(free_slots, rel_slots) slots */
454 if (free_slots < rel_slots) {
455 beg += (rel_slots - free_slots);
456 if (beg >= kring->nkr_num_slots)
457 beg -= kring->nkr_num_slots;
458 rel_slots = free_slots;
462 for ( ; rel_slots; rel_slots--) {
463 struct netmap_slot *s = &ring->slot[beg];
464 struct netmap_slot *ms = &mring->slot[i];
468 ms->buf_idx = s->buf_idx;
470 ND(5, "beg %d buf_idx %d", beg, tmp);
476 s->flags |= NS_BUF_CHANGED;
478 beg = nm_next(beg, lim);
479 i = nm_next(i, mlim);
483 mkring->nr_hwtail = i;
486 mtx_unlock(&mkring->q_lock);
489 /* notify the new frames to the monitor */
490 mkring->nm_notify(mkring, 0);
495 error = kring->mon_sync(kring, flags);
500 /* callback used to replace the nm_sync callback in the monitored tx rings */
502 netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
504 ND("%s %x", kring->name, flags);
505 return netmap_zmon_parent_sync(kring, flags, NR_TX);
508 /* callback used to replace the nm_sync callback in the monitored rx rings */
510 netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags)
512 ND("%s %x", kring->name, flags);
513 return netmap_zmon_parent_sync(kring, flags, NR_RX);
518 netmap_zmon_reg(struct netmap_adapter *na, int onoff)
520 return netmap_monitor_reg_common(na, onoff, 1 /* zcopy */);
523 /* nm_dtor callback for monitors */
525 netmap_zmon_dtor(struct netmap_adapter *na)
527 struct netmap_monitor_adapter *mna =
528 (struct netmap_monitor_adapter *)na;
529 struct netmap_priv_d *priv = &mna->priv;
530 struct netmap_adapter *pna = priv->np_na;
532 netmap_adapter_put(pna);
536 ****************************************************************
537 * functions specific for copy monitors
538 ****************************************************************
542 netmap_monitor_parent_sync(struct netmap_kring *kring, u_int first_new, int new_slots)
546 for (j = 0; j < kring->n_monitors; j++) {
547 struct netmap_kring *mkring = kring->monitors[j];
549 int free_slots, busy, sent = 0, m;
550 u_int lim = kring->nkr_num_slots - 1;
551 struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
552 u_int max_len = NETMAP_BUF_SIZE(mkring->na);
554 mlim = mkring->nkr_num_slots - 1;
556 /* we need to lock the monitor receive ring, since it
557 * is the target of bot tx and rx traffic from the monitored
560 mtx_lock(&mkring->q_lock);
561 /* get the free slots available on the monitor ring */
562 i = mkring->nr_hwtail;
563 busy = i - mkring->nr_hwcur;
565 busy += mkring->nkr_num_slots;
566 free_slots = mlim - busy;
571 /* copy min(free_slots, new_slots) slots */
574 if (free_slots < m) {
575 beg += (m - free_slots);
576 if (beg >= kring->nkr_num_slots)
577 beg -= kring->nkr_num_slots;
582 struct netmap_slot *s = &ring->slot[beg];
583 struct netmap_slot *ms = &mring->slot[i];
584 u_int copy_len = s->len;
585 char *src = NMB(kring->na, s),
586 *dst = NMB(mkring->na, ms);
588 if (unlikely(copy_len > max_len)) {
589 RD(5, "%s->%s: truncating %d to %d", kring->name,
590 mkring->name, copy_len, max_len);
594 memcpy(dst, src, copy_len);
598 beg = nm_next(beg, lim);
599 i = nm_next(i, mlim);
602 mkring->nr_hwtail = i;
604 mtx_unlock(&mkring->q_lock);
607 /* notify the new frames to the monitor */
608 mkring->nm_notify(mkring, 0);
613 /* callback used to replace the nm_sync callback in the monitored tx rings */
615 netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
620 /* get the new slots */
621 first_new = kring->nr_hwcur;
622 new_slots = kring->rhead - first_new;
624 new_slots += kring->nkr_num_slots;
626 netmap_monitor_parent_sync(kring, first_new, new_slots);
627 return kring->mon_sync(kring, flags);
630 /* callback used to replace the nm_sync callback in the monitored rx rings */
632 netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
635 int new_slots, error;
637 /* get the new slots */
638 error = kring->mon_sync(kring, flags);
641 first_new = kring->mon_tail;
642 new_slots = kring->nr_hwtail - first_new;
644 new_slots += kring->nkr_num_slots;
646 netmap_monitor_parent_sync(kring, first_new, new_slots);
647 kring->mon_tail = kring->nr_hwtail;
651 /* callback used to replace the nm_notify() callback in the monitored rx rings */
653 netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
655 ND(5, "%s %x", kring->name, flags);
656 /* ?xsync callbacks have tryget called by their callers
657 * (NIOCREGIF and poll()), but here we have to call it
660 if (nm_kr_tryget(kring))
662 netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
665 return kring->mon_notify(kring, flags);
670 netmap_monitor_reg(struct netmap_adapter *na, int onoff)
672 return netmap_monitor_reg_common(na, onoff, 0 /* no zcopy */);
676 netmap_monitor_dtor(struct netmap_adapter *na)
678 struct netmap_monitor_adapter *mna =
679 (struct netmap_monitor_adapter *)na;
680 struct netmap_priv_d *priv = &mna->priv;
681 struct netmap_adapter *pna = priv->np_na;
683 netmap_adapter_put(pna);
687 /* check if nmr is a request for a monitor adapter that we can satisfy */
689 netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
692 struct netmap_adapter *pna; /* parent adapter */
693 struct netmap_monitor_adapter *mna;
696 int zcopy = (nmr->nr_flags & NR_ZCOPY_MON);
697 char monsuff[10] = "";
699 if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
703 /* this is a request for a monitor adapter */
705 D("flags %x", nmr->nr_flags);
707 mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
713 /* first, try to find the adapter that we want to monitor
714 * We use the same nmr, after we have turned off the monitor flags.
715 * In this way we can potentially monitor everything netmap understands,
716 * except other monitors.
718 memcpy(&pnmr, nmr, sizeof(pnmr));
719 pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX);
720 error = netmap_get_na(&pnmr, &pna, create);
722 D("parent lookup failed: %d", error);
725 D("found parent: %s", pna->name);
727 if (!nm_netmap_on(pna)) {
728 /* parent not in netmap mode */
729 /* XXX we can wait for the parent to enter netmap mode,
730 * by intercepting its nm_register callback (2014-03-16)
732 D("%s not in netmap mode", pna->name);
737 /* grab all the rings we need in the parent */
738 mna->priv.np_na = pna;
739 error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags);
744 if (mna->priv.np_qlast[NR_TX] - mna->priv.np_qfirst[NR_TX] == 1) {
745 snprintf(monsuff, 10, "-%d", mna->priv.np_qfirst[NR_TX]);
747 snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name,
750 (nmr->nr_flags & NR_MONITOR_RX) ? "r" : "",
751 (nmr->nr_flags & NR_MONITOR_TX) ? "t" : "");
754 /* zero copy monitors need exclusive access to the monitored rings */
756 if (! (nmr->nr_flags & nm_txrx2flag(t)))
758 for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) {
759 struct netmap_kring *kring = &NMR(pna, t)[i];
760 if (kring->n_monitors > 0) {
762 D("ring %s already monitored by %s", kring->name,
763 kring->monitors[0]->name);
768 mna->up.nm_register = netmap_zmon_reg;
769 mna->up.nm_dtor = netmap_zmon_dtor;
770 /* to have zero copy, we need to use the same memory allocator
771 * as the monitored port
773 mna->up.nm_mem = pna->nm_mem;
774 mna->up.na_lut = pna->na_lut;
776 /* normal monitors are incompatible with zero copy ones */
778 if (! (nmr->nr_flags & nm_txrx2flag(t)))
780 for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) {
781 struct netmap_kring *kring = &NMR(pna, t)[i];
782 if (kring->n_monitors > 0 &&
783 kring->monitors[0]->na->nm_register == netmap_zmon_reg)
791 mna->up.nm_rxsync = netmap_monitor_rxsync;
792 mna->up.nm_register = netmap_monitor_reg;
793 mna->up.nm_dtor = netmap_monitor_dtor;
796 /* the monitor supports the host rings iff the parent does */
797 mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS);
798 /* a do-nothing txsync: monitors cannot be used to inject packets */
799 mna->up.nm_txsync = netmap_monitor_txsync;
800 mna->up.nm_rxsync = netmap_monitor_rxsync;
801 mna->up.nm_krings_create = netmap_monitor_krings_create;
802 mna->up.nm_krings_delete = netmap_monitor_krings_delete;
803 mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero
804 /* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
807 mna->up.num_rx_rings = pna->num_rx_rings;
808 if (pna->num_tx_rings > pna->num_rx_rings)
809 mna->up.num_rx_rings = pna->num_tx_rings;
810 /* by default, the number of slots is the same as in
811 * the parent rings, but the user may ask for a different
814 mna->up.num_tx_desc = nmr->nr_tx_slots;
815 nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
816 1, NM_MONITOR_MAXSLOTS, NULL);
817 mna->up.num_rx_desc = nmr->nr_rx_slots;
818 nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
819 1, NM_MONITOR_MAXSLOTS, NULL);
820 error = netmap_attach_common(&mna->up);
822 D("attach_common error");
826 /* remember the traffic directions we have to monitor */
827 mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX));
830 netmap_adapter_get(*na);
832 /* write the configuration back */
833 nmr->nr_tx_rings = mna->up.num_tx_rings;
834 nmr->nr_rx_rings = mna->up.num_rx_rings;
835 nmr->nr_tx_slots = mna->up.num_tx_desc;
836 nmr->nr_rx_slots = mna->up.num_rx_desc;
838 /* keep the reference to the parent */
844 netmap_adapter_put(pna);
850 #endif /* WITH_MONITOR */