From 343391c0e1976dd231ed3553b508b6a49175bf48 Mon Sep 17 00:00:00 2001 From: luigi Date: Fri, 10 May 2013 16:16:33 +0000 Subject: [PATCH] MFC: sync the version of netmap with the one in HEAD, including device drivers (mostly simplifying the code in the interrupt handlers). On passing, also merge r250414, which is related to netmap and the use of lem/em in virtual machines. git-svn-id: svn://svn.freebsd.org/base/stable/9@250458 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- README | 2 +- sys/dev/e1000/if_em.c | 52 +- sys/dev/e1000/if_em.h | 1 + sys/dev/e1000/if_igb.c | 27 +- sys/dev/e1000/if_lem.c | 45 +- sys/dev/e1000/if_lem.h | 1 + sys/dev/ixgbe/ixgbe.c | 27 +- sys/dev/netmap/if_em_netmap.h | 3 +- sys/dev/netmap/if_igb_netmap.h | 1 - sys/dev/netmap/if_lem_netmap.h | 1 - sys/dev/netmap/if_re_netmap.h | 7 +- sys/dev/netmap/ixgbe_netmap.h | 9 +- sys/dev/netmap/netmap.c | 116 ++- sys/dev/netmap/netmap_kern.h | 45 +- sys/dev/netmap/netmap_mem2.c | 70 +- sys/dev/re/if_re.c | 19 +- sys/net/netmap.h | 1 - sys/net/netmap_user.h | 1 - tools/tools/netmap/Makefile | 9 +- tools/tools/netmap/README | 1 + tools/tools/netmap/bridge.c | 235 +----- tools/tools/netmap/nm_util.c | 251 +++++++ tools/tools/netmap/nm_util.h | 183 +++++ tools/tools/netmap/pcap.c | 337 +++------ tools/tools/netmap/pkt-gen.c | 1282 ++++++++++++++++++++------------ 25 files changed, 1576 insertions(+), 1150 deletions(-) create mode 100644 tools/tools/netmap/nm_util.c create mode 100644 tools/tools/netmap/nm_util.h diff --git a/README b/README index 4e49142a9..65b1bf7c5 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -This is the top level of the FreeBSD source directory. This file +$FreeBSD$ was last revised on: $FreeBSD$ diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 64be42dc0..9e83b31b4 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -335,6 +335,9 @@ MODULE_DEPEND(em, ether, 1, 1, 1); #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define M_TSO_LEN 66 +#define MAX_INTS_PER_SEC 8000 +#define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) + /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 @@ -570,6 +573,11 @@ em_attach(device_t dev) &adapter->tx_abs_int_delay, E1000_REGISTER(hw, E1000_TADV), em_tx_abs_int_delay_dflt); + em_add_int_delay_sysctl(adapter, "itr", + "interrupt delay limit in usecs/4", + &adapter->tx_itr, + E1000_REGISTER(hw, E1000_ITR), + DEFAULT_ITR); /* Sysctl for limiting the amount of work done in the taskqueue */ em_set_sysctl_value(adapter, "rx_processing_limit", @@ -3803,17 +3811,9 @@ em_txeof(struct tx_ring *txr) EM_TX_LOCK_ASSERT(txr); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(ifp); - - selwakeuppri(&na->tx_rings[txr->me].si, PI_NET); - EM_TX_UNLOCK(txr); - EM_CORE_LOCK(adapter); - selwakeuppri(&na->tx_si, PI_NET); - EM_CORE_UNLOCK(adapter); - EM_TX_LOCK(txr); + if (netmap_tx_irq(ifp, txr->me | + (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT))) return; - } #endif /* DEV_NETMAP */ /* No work, make sure watchdog is off */ @@ -4254,8 +4254,6 @@ em_free_receive_buffers(struct rx_ring *rxr) * Enable receive unit. * **********************************************************************/ -#define MAX_INTS_PER_SEC 8000 -#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) static void em_initialize_receive_unit(struct adapter *adapter) @@ -4315,6 +4313,8 @@ em_initialize_receive_unit(struct adapter *adapter) for (int i = 0; i < adapter->num_queues; i++, rxr++) { /* Setup the Base and Length of the Rx Descriptor Ring */ + u32 rdt = adapter->num_rx_desc - 1; /* default */ + bus_addr = rxr->rxdma.dma_paddr; E1000_WRITE_REG(hw, E1000_RDLEN(i), adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); @@ -4326,18 +4326,11 @@ em_initialize_receive_unit(struct adapter *adapter) /* * an init() while a netmap client is active must * preserve the rx buffers passed to userspace. - * In this driver it means we adjust RDT to - * something different from na->num_rx_desc - 1. */ - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_kring *kring = &na->rx_rings[i]; - int t = na->num_rx_desc - 1 - kring->nr_hwavail; - - E1000_WRITE_REG(hw, E1000_RDT(i), t); - } else + if (ifp->if_capenable & IFCAP_NETMAP) + rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail; #endif /* DEV_NETMAP */ - E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1); + E1000_WRITE_REG(hw, E1000_RDT(i), rdt); } /* Set PTHRESH for improved jumbo performance */ @@ -4414,17 +4407,8 @@ em_rxeof(struct rx_ring *rxr, int count, int *done) EM_RX_LOCK(rxr); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(ifp); - - na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR; - selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET); - EM_RX_UNLOCK(rxr); - EM_CORE_LOCK(adapter); - selwakeuppri(&na->rx_si, PI_NET); - EM_CORE_UNLOCK(adapter); - return (0); - } + if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed)) + return (FALSE); #endif /* DEV_NETMAP */ for (i = rxr->next_to_check, processed = 0; count != 0;) { @@ -5622,6 +5606,8 @@ em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); + if (info->offset == E1000_ITR) /* units are 256ns here */ + ticks *= 4; adapter = info->adapter; diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h index f1afbadb3..09d81d250 100644 --- a/sys/dev/e1000/if_em.h +++ b/sys/dev/e1000/if_em.h @@ -429,6 +429,7 @@ struct adapter { struct em_int_delay_info tx_abs_int_delay; struct em_int_delay_info rx_int_delay; struct em_int_delay_info rx_abs_int_delay; + struct em_int_delay_info tx_itr; /* Misc stats maintained by the driver */ unsigned long dropped_pkts; diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index 993952a2b..5b311199e 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -3872,17 +3872,9 @@ igb_txeof(struct tx_ring *txr) IGB_TX_LOCK_ASSERT(txr); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(ifp); - - selwakeuppri(&na->tx_rings[txr->me].si, PI_NET); - IGB_TX_UNLOCK(txr); - IGB_CORE_LOCK(adapter); - selwakeuppri(&na->tx_si, PI_NET); - IGB_CORE_UNLOCK(adapter); - IGB_TX_LOCK(txr); - return FALSE; - } + if (netmap_tx_irq(ifp, txr->me | + (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT))) + return (FALSE); #endif /* DEV_NETMAP */ if (txr->tx_avail == adapter->num_tx_desc) { txr->queue_status = IGB_QUEUE_IDLE; @@ -4736,17 +4728,8 @@ igb_rxeof(struct igb_queue *que, int count, int *done) BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(ifp); - - na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR; - selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET); - IGB_RX_UNLOCK(rxr); - IGB_CORE_LOCK(adapter); - selwakeuppri(&na->rx_si, PI_NET); - IGB_CORE_UNLOCK(adapter); - return (0); - } + if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed)) + return (FALSE); #endif /* DEV_NETMAP */ /* Main clean loop */ diff --git a/sys/dev/e1000/if_lem.c b/sys/dev/e1000/if_lem.c index b7d2359cb..fbf61e74f 100644 --- a/sys/dev/e1000/if_lem.c +++ b/sys/dev/e1000/if_lem.c @@ -281,6 +281,9 @@ MODULE_DEPEND(lem, ether, 1, 1, 1); #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) +#define MAX_INTS_PER_SEC 8000 +#define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) + static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); @@ -442,6 +445,11 @@ lem_attach(device_t dev) &adapter->tx_abs_int_delay, E1000_REGISTER(&adapter->hw, E1000_TADV), lem_tx_abs_int_delay_dflt); + lem_add_int_delay_sysctl(adapter, "itr", + "interrupt delay limit in usecs/4", + &adapter->tx_itr, + E1000_REGISTER(&adapter->hw, E1000_ITR), + DEFAULT_ITR); } /* Sysctls for limiting the amount of work done in the taskqueue */ @@ -1337,12 +1345,16 @@ lem_handle_rxtx(void *context, int pending) if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - lem_rxeof(adapter, adapter->rx_process_limit, NULL); + bool more = lem_rxeof(adapter, adapter->rx_process_limit, NULL); EM_TX_LOCK(adapter); lem_txeof(adapter); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) lem_start_locked(ifp); EM_TX_UNLOCK(adapter); + if (more) { + taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); + return; + } } if (ifp->if_drv_flags & IFF_DRV_RUNNING) @@ -2955,10 +2967,8 @@ lem_txeof(struct adapter *adapter) EM_TX_LOCK_ASSERT(adapter); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - selwakeuppri(&NA(ifp)->tx_rings[0].si, PI_NET); + if (netmap_tx_irq(ifp, 0 | (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT))) return; - } #endif /* DEV_NETMAP */ if (adapter->num_tx_desc_avail == adapter->num_tx_desc) return; @@ -3246,8 +3256,6 @@ lem_setup_receive_structures(struct adapter *adapter) * Enable receive unit. * **********************************************************************/ -#define MAX_INTS_PER_SEC 8000 -#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) static void lem_initialize_receive_unit(struct adapter *adapter) @@ -3338,19 +3346,13 @@ lem_initialize_receive_unit(struct adapter *adapter) * Tail Descriptor Pointers */ E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0); + rctl = adapter->num_rx_desc - 1; /* default RDT value */ #ifdef DEV_NETMAP /* preserve buffers already made available to clients */ - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_kring *kring = &na->rx_rings[0]; - int t = na->num_rx_desc - 1 - kring->nr_hwavail; - - if (t >= na->num_rx_desc) - t -= na->num_rx_desc; - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), t); - } else + if (ifp->if_capenable & IFCAP_NETMAP) + rctl -= NA(adapter->ifp)->rx_rings[0].nr_hwavail; #endif /* DEV_NETMAP */ - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), adapter->num_rx_desc - 1); + E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rctl); return; } @@ -3434,13 +3436,8 @@ lem_rxeof(struct adapter *adapter, int count, int *done) BUS_DMASYNC_POSTREAD); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(ifp); - na->rx_rings[0].nr_kflags |= NKR_PENDINTR; - selwakeuppri(&na->rx_rings[0].si, PI_NET); - EM_RX_UNLOCK(adapter); - return (0); - } + if (netmap_rx_irq(ifp, 0 | NETMAP_LOCKED_ENTER, &rx_sent)) + return (FALSE); #endif /* DEV_NETMAP */ if (!((current_desc->status) & E1000_RXD_STAT_DD)) { @@ -4584,6 +4581,8 @@ lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); + if (info->offset == E1000_ITR) /* units are 256ns here */ + ticks *= 4; adapter = info->adapter; diff --git a/sys/dev/e1000/if_lem.h b/sys/dev/e1000/if_lem.h index c1973e55b..235277d74 100644 --- a/sys/dev/e1000/if_lem.h +++ b/sys/dev/e1000/if_lem.h @@ -363,6 +363,7 @@ struct adapter { struct em_int_delay_info tx_abs_int_delay; struct em_int_delay_info rx_int_delay; struct em_int_delay_info rx_abs_int_delay; + struct em_int_delay_info tx_itr; /* * Transmit definitions diff --git a/sys/dev/ixgbe/ixgbe.c b/sys/dev/ixgbe/ixgbe.c index 3f9207190..aa95444ab 100644 --- a/sys/dev/ixgbe/ixgbe.c +++ b/sys/dev/ixgbe/ixgbe.c @@ -3573,13 +3573,8 @@ ixgbe_txeof(struct tx_ring *txr) if (!netmap_mitigate || (kring->nr_kflags < kring->nkr_num_slots && txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { - kring->nr_kflags = kring->nkr_num_slots; - selwakeuppri(&na->tx_rings[txr->me].si, PI_NET); - IXGBE_TX_UNLOCK(txr); - IXGBE_CORE_LOCK(adapter); - selwakeuppri(&na->tx_si, PI_NET); - IXGBE_CORE_UNLOCK(adapter); - IXGBE_TX_LOCK(txr); + netmap_tx_irq(ifp, txr->me | + (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)); } return FALSE; } @@ -4364,23 +4359,9 @@ ixgbe_rxeof(struct ix_queue *que) IXGBE_RX_LOCK(rxr); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - /* - * Same as the txeof routine: only wakeup clients on intr. - * NKR_PENDINTR in nr_kflags is used to implement interrupt - * mitigation (ixgbe_rxsync() will not look for new packets - * unless NKR_PENDINTR is set). - */ - struct netmap_adapter *na = NA(ifp); - - na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR; - selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET); - IXGBE_RX_UNLOCK(rxr); - IXGBE_CORE_LOCK(adapter); - selwakeuppri(&na->rx_si, PI_NET); - IXGBE_CORE_UNLOCK(adapter); + /* Same as the txeof routine: wakeup clients on intr. */ + if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed)) return (FALSE); - } #endif /* DEV_NETMAP */ for (i = rxr->next_to_check; count != 0;) { struct mbuf *sendmp, *mp; diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h index 776f0e0ee..5bfbd3d76 100644 --- a/sys/dev/netmap/if_em_netmap.h +++ b/sys/dev/netmap/if_em_netmap.h @@ -25,7 +25,6 @@ /* * $FreeBSD$ - * $Id: if_em_netmap.h 10627 2012-02-23 19:37:15Z luigi $ * * netmap support for em. * @@ -277,7 +276,7 @@ em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) k = ring->cur; if (k > lim) return netmap_ring_reinit(kring); - + if (do_lock) EM_RX_LOCK(rxr); diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h index e817341cc..a94ca3b18 100644 --- a/sys/dev/netmap/if_igb_netmap.h +++ b/sys/dev/netmap/if_igb_netmap.h @@ -25,7 +25,6 @@ /* * $FreeBSD$ - * $Id: if_igb_netmap.h 10627 2012-02-23 19:37:15Z luigi $ * * Netmap support for igb, partly contributed by Ahmed Kooli * For details on netmap support please see ixgbe_netmap.h diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h index d3be70017..acef45f0d 100644 --- a/sys/dev/netmap/if_lem_netmap.h +++ b/sys/dev/netmap/if_lem_netmap.h @@ -26,7 +26,6 @@ /* * $FreeBSD$ - * $Id: if_lem_netmap.h 10627 2012-02-23 19:37:15Z luigi $ * * netmap support for "lem" * diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h index 1c747b715..f0f1f1969 100644 --- a/sys/dev/netmap/if_re_netmap.h +++ b/sys/dev/netmap/if_re_netmap.h @@ -25,7 +25,6 @@ /* * $FreeBSD$ - * $Id: if_re_netmap.h 10609 2012-02-22 19:44:58Z luigi $ * * netmap support for "re" * For details on netmap support please see ixgbe_netmap.h @@ -151,7 +150,7 @@ re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) /* update avail to what the kernel knows */ ring->avail = kring->nr_hwavail; - + j = kring->nr_hwcur; if (j != k) { /* we have new packets to send */ l = sc->rl_ldata.rl_tx_prodidx; @@ -170,7 +169,7 @@ re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) // XXX what about prodidx ? return netmap_ring_reinit(kring); } - + if (l == lim) /* mark end of ring */ cmd |= RL_TDESC_CMD_EOR; @@ -335,7 +334,7 @@ re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int do_lock) */ static void re_netmap_tx_init(struct rl_softc *sc) -{ +{ struct rl_txdesc *txd; struct rl_desc *desc; int i, n; diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h index 55660ec08..be790502c 100644 --- a/sys/dev/netmap/ixgbe_netmap.h +++ b/sys/dev/netmap/ixgbe_netmap.h @@ -25,7 +25,6 @@ /* * $FreeBSD$ - * $Id: ixgbe_netmap.h 10627 2012-02-23 19:37:15Z luigi $ * * netmap modifications for ixgbe * @@ -226,7 +225,8 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->tx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - u_int j, k = ring->cur, l, n = 0, lim = kring->nkr_num_slots - 1; + u_int j, l, n = 0; + u_int const k = ring->cur, lim = kring->nkr_num_slots - 1; /* * ixgbe can generate an interrupt on every tx packet, but it @@ -393,11 +393,10 @@ ring_reset: if (ix_use_dd) { struct ixgbe_legacy_tx_desc *txd = (struct ixgbe_legacy_tx_desc *)txr->tx_base; - + u_int k1 = netmap_idx_k2n(kring, kring->nr_hwcur); l = txr->next_to_clean; - k = netmap_idx_k2n(kring, kring->nr_hwcur); delta = 0; - while (l != k && + while (l != k1 && txd[l].upper.fields.status & IXGBE_TXD_STAT_DD) { delta++; l = (l == lim) ? 0 : l + 1; diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 35d530390..b55c338fa 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -81,6 +81,7 @@ __FBSDID("$FreeBSD$"); #include /* PROT_EXEC */ #include #include +#include #include /* vtophys */ #include /* vtophys */ #include /* sockaddrs */ @@ -98,6 +99,7 @@ MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map"); #include #include +/* XXX the following variables must be deprecated and included in nm_mem */ u_int netmap_total_buffers; u_int netmap_buf_size; char *netmap_buffer_base; /* address of an invalid buffer */ @@ -121,12 +123,10 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, int netmap_drop = 0; /* debugging */ int netmap_flags = 0; /* debug flags */ int netmap_fwd = 0; /* force transparent mode */ -int netmap_copy = 0; /* debugging, copy content */ SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , ""); SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, copy, CTLFLAG_RW, &netmap_copy, 0 , ""); #ifdef NM_BRIDGE /* support for netmap bridge */ @@ -147,22 +147,33 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, copy, CTLFLAG_RW, &netmap_copy, 0 , ""); #define NM_BDG_HASH 1024 /* forwarding table entries */ #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ #define NM_BRIDGES 4 /* number of bridges */ + + int netmap_bridge = NM_BDG_BATCH; /* bridge batch size */ SYSCTL_INT(_dev_netmap, OID_AUTO, bridge, CTLFLAG_RW, &netmap_bridge, 0 , ""); #ifdef linux -#define ADD_BDG_REF(ifp) (NA(ifp)->if_refcount++) -#define DROP_BDG_REF(ifp) (NA(ifp)->if_refcount-- <= 1) + +#define refcount_acquire(_a) atomic_add(1, (atomic_t *)_a) +#define refcount_release(_a) atomic_dec_and_test((atomic_t *)_a) + #else /* !linux */ -#define ADD_BDG_REF(ifp) (ifp)->if_refcount++ -#define DROP_BDG_REF(ifp) refcount_release(&(ifp)->if_refcount) + #ifdef __FreeBSD__ #include #include #endif /* __FreeBSD__ */ + #define prefetch(x) __builtin_prefetch(x) + #endif /* !linux */ +/* + * These are used to handle reference counters for bridge ports. + */ +#define ADD_BDG_REF(ifp) refcount_acquire(&NA(ifp)->na_bdg_refcount) +#define DROP_BDG_REF(ifp) refcount_release(&NA(ifp)->na_bdg_refcount) + static void bdg_netmap_attach(struct ifnet *ifp); static int bdg_netmap_reg(struct ifnet *ifp, int onoff); /* per-tx-queue entry */ @@ -179,9 +190,14 @@ struct nm_hash_ent { }; /* - * Interfaces for a bridge are all in ports[]. + * Interfaces for a bridge are all in bdg_ports[]. * The array has fixed size, an empty entry does not terminate - * the search. + * the search. But lookups only occur on attach/detach so we + * don't mind if they are slow. + * + * The bridge is non blocking on the transmit ports. + * + * bdg_lock protects accesses to the bdg_ports array. */ struct nm_bridge { struct ifnet *bdg_ports[NM_BDG_MAXPORTS]; @@ -297,7 +313,7 @@ netmap_update_config(struct netmap_adapter *na) txd = na->num_tx_desc; rxr = na->num_rx_rings; rxd = na->num_rx_desc; - } + } if (na->num_tx_rings == txr && na->num_tx_desc == txd && na->num_rx_rings == rxr && na->num_rx_desc == rxd) @@ -323,11 +339,7 @@ netmap_update_config(struct netmap_adapter *na) } /*------------- memory allocator -----------------*/ -#ifdef NETMAP_MEM2 #include "netmap_mem2.c" -#else /* !NETMAP_MEM2 */ -#include "netmap_mem1.c" -#endif /* !NETMAP_MEM2 */ /*------------ end of memory allocator ----------*/ @@ -497,16 +509,16 @@ netmap_dtor(void *data) { struct netmap_priv_d *priv = data; struct ifnet *ifp = priv->np_ifp; - struct netmap_adapter *na; NMA_LOCK(); if (ifp) { - na = NA(ifp); + struct netmap_adapter *na = NA(ifp); + na->nm_lock(ifp, NETMAP_REG_LOCK, 0); netmap_dtor_locked(data); na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0); - nm_if_rele(ifp); + nm_if_rele(ifp); /* might also destroy *na */ } if (priv->ref_done) { netmap_memory_deref(); @@ -1668,19 +1680,25 @@ netmap_attach(struct netmap_adapter *arg, int num_queues) ND("using default locks for %s", ifp->if_xname); na->nm_lock = netmap_lock_wrapper; } + #ifdef linux - if (ifp->netdev_ops) { - ND("netdev_ops %p", ifp->netdev_ops); - /* prepare a clone of the netdev ops */ - na->nm_ndo = *ifp->netdev_ops; + if (!ifp->netdev_ops) { + D("ouch, we cannot override netdev_ops"); + goto fail; } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) + /* if needed, prepare a clone of the entire netdev ops */ + na->nm_ndo = *ifp->netdev_ops; +#endif /* 2.6.28 and above */ na->nm_ndo.ndo_start_xmit = linux_netmap_start; -#endif +#endif /* linux */ + D("success for %s", ifp->if_xname); return 0; fail: D("fail, arg %p ifp %p na %p", arg, ifp, na); + netmap_detach(ifp); return (na ? EINVAL : ENOMEM); } @@ -1726,17 +1744,18 @@ netmap_start(struct ifnet *ifp, struct mbuf *m) if (netmap_verbose & NM_VERB_HOST) D("%s packet %d len %d from the stack", ifp->if_xname, kring->nr_hwcur + kring->nr_hwavail, len); + if (len > NETMAP_BUF_SIZE) { /* too long for us */ + D("%s from_host, drop packet size %d > %d", ifp->if_xname, + len, NETMAP_BUF_SIZE); + m_freem(m); + return EINVAL; + } na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); if (kring->nr_hwavail >= lim) { if (netmap_verbose) D("stack ring %s full\n", ifp->if_xname); goto done; /* no space */ } - if (len > NETMAP_BUF_SIZE) { - D("%s from_host, drop packet size %d > %d", ifp->if_xname, - len, NETMAP_BUF_SIZE); - goto done; /* too long for us */ - } /* compute the insert position */ i = kring->nr_hwcur + kring->nr_hwavail; @@ -1837,6 +1856,10 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n, * N rings, separate locks: * lock(i); wake(i); unlock(i); lock(core) wake(N+1) unlock(core) * work_done is non-null on the RX path. + * + * The 'q' argument also includes flag to tell whether the queue is + * already locked on enter, and whether it should remain locked on exit. + * This helps adapting to different defaults in drivers and OSes. */ int netmap_rx_irq(struct ifnet *ifp, int q, int *work_done) @@ -1844,9 +1867,14 @@ netmap_rx_irq(struct ifnet *ifp, int q, int *work_done) struct netmap_adapter *na; struct netmap_kring *r; NM_SELINFO_T *main_wq; + int locktype, unlocktype, lock; if (!(ifp->if_capenable & IFCAP_NETMAP)) return 0; + + lock = q & (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT); + q = q & NETMAP_RING_MASK; + ND(5, "received %s queue %d", work_done ? "RX" : "TX" , q); na = NA(ifp); if (na->na_flags & NAF_SKIP_INTR) { @@ -1856,32 +1884,42 @@ netmap_rx_irq(struct ifnet *ifp, int q, int *work_done) if (work_done) { /* RX path */ if (q >= na->num_rx_rings) - return 0; // regular queue + return 0; // not a physical queue r = na->rx_rings + q; r->nr_kflags |= NKR_PENDINTR; main_wq = (na->num_rx_rings > 1) ? &na->rx_si : NULL; - } else { /* tx path */ + locktype = NETMAP_RX_LOCK; + unlocktype = NETMAP_RX_UNLOCK; + } else { /* TX path */ if (q >= na->num_tx_rings) - return 0; // regular queue + return 0; // not a physical queue r = na->tx_rings + q; main_wq = (na->num_tx_rings > 1) ? &na->tx_si : NULL; work_done = &q; /* dummy */ + locktype = NETMAP_TX_LOCK; + unlocktype = NETMAP_TX_UNLOCK; } if (na->separate_locks) { - mtx_lock(&r->q_lock); + if (!(lock & NETMAP_LOCKED_ENTER)) + na->nm_lock(ifp, locktype, q); selwakeuppri(&r->si, PI_NET); - mtx_unlock(&r->q_lock); + na->nm_lock(ifp, unlocktype, q); if (main_wq) { - mtx_lock(&na->core_lock); + na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); selwakeuppri(main_wq, PI_NET); - mtx_unlock(&na->core_lock); + na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0); } + /* lock the queue again if requested */ + if (lock & NETMAP_LOCKED_EXIT) + na->nm_lock(ifp, locktype, q); } else { - mtx_lock(&na->core_lock); + if (!(lock & NETMAP_LOCKED_ENTER)) + na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); selwakeuppri(&r->si, PI_NET); if (main_wq) selwakeuppri(main_wq, PI_NET); - mtx_unlock(&na->core_lock); + if (!(lock & NETMAP_LOCKED_EXIT)) + na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0); } *work_done = 1; /* do not fire napi again */ return 1; @@ -1902,7 +1940,9 @@ netmap_rx_irq(struct ifnet *ifp, int q, int *work_done) static u_int linux_netmap_poll(struct file * file, struct poll_table_struct *pwait) { -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + int events = POLLIN | POLLOUT; /* XXX maybe... */ +#elif LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) int events = pwait ? pwait->key : POLLIN | POLLOUT; #else /* in 3.4.0 field 'key' was renamed to '_key' */ int events = pwait ? pwait->_key : POLLIN | POLLOUT; @@ -1942,7 +1982,7 @@ linux_netmap_mmap(struct file *f, struct vm_area_struct *vma) * vtophys mapping in lut[k] so we use that, scanning * the lut[] array in steps of clustentries, * and we map each cluster (not individual pages, - * it would be overkill). + * it would be overkill -- XXX slow ? 20130415). */ /* diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 86a26fbee..7ab617e72 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -25,7 +25,6 @@ /* * $FreeBSD$ - * $Id: netmap_kern.h 11829 2012-09-26 04:06:34Z luigi $ * * The header contains the definitions of constants and function * prototypes used only in kernelspace. @@ -34,9 +33,8 @@ #ifndef _NET_NETMAP_KERN_H_ #define _NET_NETMAP_KERN_H_ -#define NETMAP_MEM2 // use the new memory allocator - #if defined(__FreeBSD__) + #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) @@ -44,8 +42,10 @@ #define NM_SELINFO_T struct selinfo #define MBUF_LEN(m) ((m)->m_pkthdr.len) #define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m) + #elif defined (linux) -#define NM_LOCK_T spinlock_t + +#define NM_LOCK_T safe_spinlock_t // see bsd_glue.h #define NM_SELINFO_T wait_queue_head_t #define MBUF_LEN(m) ((m)->len) #define NM_SEND_UP(ifp, m) netif_rx(m) @@ -67,6 +67,7 @@ #endif #elif defined (__APPLE__) + #warning apple support is incomplete. #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) @@ -76,8 +77,10 @@ #define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m) #else + #error unsupported platform -#endif + +#endif /* end - platform-specific code */ #define ND(format, ...) #define D(format, ...) \ @@ -207,10 +210,20 @@ struct netmap_adapter { int (*nm_config)(struct ifnet *, u_int *txr, u_int *txd, u_int *rxr, u_int *rxd); + /* + * Bridge support: + * + * bdg_port is the port number used in the bridge; + * na_bdg_refcount is a refcount used for bridge ports, + * when it goes to 0 we can detach+free this port + * (a bridge port is always attached if it exists; + * it is not always registered) + */ int bdg_port; + int na_bdg_refcount; + #ifdef linux struct net_device_ops nm_ndo; - int if_refcount; // XXX additions for bridge #endif /* linux */ }; @@ -245,6 +258,10 @@ enum { #endif }; +/* How to handle locking support in netmap_rx_irq/netmap_tx_irq */ +#define NETMAP_LOCKED_ENTER 0x10000000 /* already locked on enter */ +#define NETMAP_LOCKED_EXIT 0x20000000 /* keep locked on exit */ + /* * The following are support routines used by individual drivers to * support netmap operation. @@ -272,7 +289,7 @@ struct netmap_slot *netmap_reset(struct netmap_adapter *na, int netmap_ring_reinit(struct netmap_kring *); extern u_int netmap_buf_size; -#define NETMAP_BUF_SIZE netmap_buf_size +#define NETMAP_BUF_SIZE netmap_buf_size // XXX remove extern int netmap_mitigate; extern int netmap_no_pendintr; extern u_int netmap_total_buffers; @@ -431,20 +448,16 @@ netmap_idx_k2n(struct netmap_kring *kr, int idx) } -#ifdef NETMAP_MEM2 /* Entries of the look-up table. */ struct lut_entry { void *vaddr; /* virtual address. */ - vm_paddr_t paddr; /* phisical address. */ + vm_paddr_t paddr; /* physical address. */ }; struct netmap_obj_pool; extern struct lut_entry *netmap_buffer_lut; #define NMB_VA(i) (netmap_buffer_lut[i].vaddr) #define NMB_PA(i) (netmap_buffer_lut[i].paddr) -#else /* NETMAP_MEM1 */ -#define NMB_VA(i) (netmap_buffer_base + (i * NETMAP_BUF_SIZE) ) -#endif /* NETMAP_MEM2 */ /* * NMB return the virtual address of a buffer (buffer 0 on bad index) @@ -462,11 +475,8 @@ PNMB(struct netmap_slot *slot, uint64_t *pp) { uint32_t i = slot->buf_idx; void *ret = (i >= netmap_total_buffers) ? NMB_VA(0) : NMB_VA(i); -#ifdef NETMAP_MEM2 + *pp = (i >= netmap_total_buffers) ? NMB_PA(0) : NMB_PA(i); -#else - *pp = vtophys(ret); -#endif return ret; } @@ -474,5 +484,4 @@ PNMB(struct netmap_slot *slot, uint64_t *pp) int netmap_rx_irq(struct ifnet *, int, int *); #define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL) -extern int netmap_copy; #endif /* _NET_NETMAP_KERN_H_ */ diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index 03a52b60d..dcf4b06d8 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved. + * Copyright (C) 2012-2013 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -25,22 +25,23 @@ /* * $FreeBSD$ - * $Id: netmap_mem2.c 11881 2012-10-18 23:24:15Z luigi $ * * (New) memory allocator for netmap */ /* - * This allocator creates three memory regions: + * This allocator creates three memory pools: * nm_if_pool for the struct netmap_if * nm_ring_pool for the struct netmap_ring * nm_buf_pool for the packet buffers. * - * All regions need to be multiple of a page size as we export them to - * userspace through mmap. Only the latter needs to be dma-able, + * that contain netmap objects. Each pool is made of a number of clusters, + * multiple of a page size, each containing an integer number of objects. + * The clusters are contiguous in user space but not in the kernel. + * Only nm_buf_pool needs to be dma-able, * but for convenience use the same type of allocator for all. * - * Once mapped, the three regions are exported to userspace + * Once mapped, the three pools are exported to userspace * as a contiguous block, starting from nm_if_pool. Each * cluster (and pool) is an integral number of pages. * [ . . . ][ . . . . . .][ . . . . . . . . . .] @@ -56,7 +57,7 @@ * The pool is split into smaller clusters, whose size is a * multiple of the page size. The cluster size is chosen * to minimize the waste for a given max cluster size - * (we do it by brute force, as we have relatively few object + * (we do it by brute force, as we have relatively few objects * per cluster). * * Objects are aligned to the cache line (64 bytes) rounding up object @@ -80,7 +81,7 @@ * In the worst case we have one netmap_if per ring in the system. * * struct netmap_ring - * variable too, 8 byte per slot plus some fixed amount. + * variable size, 8 byte per slot plus some fixed amount. * Rings can be large (e.g. 4k slots, or >32Kbytes). * We default to 36 KB (9 pages), and a few hundred rings. * @@ -93,16 +94,14 @@ * the size to multiple of 1K or so. Default to 2K */ -#ifndef CONSERVATIVE #define NETMAP_BUF_MAX_NUM 20*4096*2 /* large machine */ -#else /* CONSERVATIVE */ -#define NETMAP_BUF_MAX_NUM 20000 /* 40MB */ -#endif #ifdef linux +// XXX a mtx would suffice here 20130415 lr +// #define NMA_LOCK_T safe_spinlock_t #define NMA_LOCK_T struct semaphore #define NMA_LOCK_INIT() sema_init(&nm_mem.nm_mtx, 1) -#define NMA_LOCK_DESTROY() +#define NMA_LOCK_DESTROY() #define NMA_LOCK() down(&nm_mem.nm_mtx) #define NMA_UNLOCK() up(&nm_mem.nm_mtx) #else /* !linux */ @@ -178,7 +177,11 @@ struct netmap_mem_d { struct netmap_obj_pool pools[NETMAP_POOLS_NR]; }; - +/* + * nm_mem is the memory allocator used for all physical interfaces + * running in netmap mode. + * Virtual (VALE) ports will have each its own allocator. + */ static struct netmap_mem_d nm_mem = { /* Our memory allocator. */ .pools = { [NETMAP_IF_POOL] = { @@ -205,6 +208,7 @@ static struct netmap_mem_d nm_mem = { /* Our memory allocator. */ }, }; +// XXX logically belongs to nm_mem struct lut_entry *netmap_buffer_lut; /* exported */ /* memory allocator related sysctls */ @@ -212,12 +216,10 @@ struct lut_entry *netmap_buffer_lut; /* exported */ #define STRINGIFY(x) #x #define DECLARE_SYSCTLS(id, name) \ - /* TUNABLE_INT("hw.netmap." STRINGIFY(name) "_size", &netmap_params[id].size); */ \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \ CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \ CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \ - /* TUNABLE_INT("hw.netmap." STRINGIFY(name) "_num", &netmap_params[id].num); */ \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \ CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \ @@ -228,14 +230,12 @@ DECLARE_SYSCTLS(NETMAP_RING_POOL, ring); DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf); /* - * Convert a userspace offset to a phisical address. - * XXX re-do in a simpler way. + * Convert a userspace offset to a physical address. + * XXX only called in the FreeBSD's netmap_mmap() + * because in linux we map everything at once. * - * The idea here is to hide userspace applications the fact that pre-allocated - * memory is not contiguous, but fragmented across different clusters and - * smaller memory allocators. Consequently, first of all we need to find which - * allocator is owning provided offset, then we need to find out the physical - * address associated to target page (this is done using the look-up table. + * First, find the allocator that contains the requested offset, + * then locate the cluster through a lookup table. */ static inline vm_paddr_t netmap_ofstophys(vm_offset_t offset) @@ -247,7 +247,7 @@ netmap_ofstophys(vm_offset_t offset) for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i]._memtotal, i++) { if (offset >= p[i]._memtotal) continue; - // XXX now scan the clusters + // now lookup the cluster's address return p[i].lut[offset / p[i]._objsize].paddr + offset % p[i]._objsize; } @@ -278,7 +278,7 @@ netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr) const char *base = p->lut[i].vaddr; ssize_t relofs = (const char *) vaddr - base; - if (relofs < 0 || relofs > p->_clustsize) + if (relofs < 0 || relofs >= p->_clustsize) continue; ofs = ofs + relofs; @@ -296,12 +296,12 @@ netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr) netmap_obj_offset(&nm_mem.pools[NETMAP_IF_POOL], (v)) #define netmap_ring_offset(v) \ - (nm_mem.pools[NETMAP_IF_POOL]._memtotal + \ + (nm_mem.pools[NETMAP_IF_POOL]._memtotal + \ netmap_obj_offset(&nm_mem.pools[NETMAP_RING_POOL], (v))) #define netmap_buf_offset(v) \ - (nm_mem.pools[NETMAP_IF_POOL]._memtotal + \ - nm_mem.pools[NETMAP_RING_POOL]._memtotal + \ + (nm_mem.pools[NETMAP_IF_POOL]._memtotal + \ + nm_mem.pools[NETMAP_RING_POOL]._memtotal + \ netmap_obj_offset(&nm_mem.pools[NETMAP_BUF_POOL], (v))) @@ -356,7 +356,8 @@ netmap_obj_malloc(struct netmap_obj_pool *p, int len, uint32_t *start, uint32_t /* - * free by index, not by address + * free by index, not by address. This is slow, but is only used + * for a small number of objects (rings, nifp) */ static void netmap_obj_free(struct netmap_obj_pool *p, uint32_t j) @@ -380,7 +381,7 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr) ssize_t relofs = (ssize_t) vaddr - (ssize_t) base; /* Given address, is out of the scope of the current cluster.*/ - if (vaddr < base || relofs > p->_clustsize) + if (vaddr < base || relofs >= p->_clustsize) continue; j = j + relofs / p->_objsize; @@ -428,7 +429,7 @@ netmap_new_bufs(struct netmap_if *nifp, * in the NIC ring. This is a hack that hides missing * initializations in the drivers, and should go away. */ - slot[i].flags = NS_BUF_CHANGED; + // slot[i].flags = NS_BUF_CHANGED; } ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos); @@ -526,12 +527,12 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj objsize += LINE_ROUND - i; } if (objsize < p->objminsize || objsize > p->objmaxsize) { - D("requested objsize %d out of range [%d, %d]", + D("requested objsize %d out of range [%d, %d]", objsize, p->objminsize, p->objmaxsize); goto error; } if (objtotal < p->nummin || objtotal > p->nummax) { - D("requested objtotal %d out of range [%d, %d]", + D("requested objtotal %d out of range [%d, %d]", objtotal, p->nummin, p->nummax); goto error; } @@ -683,7 +684,6 @@ netmap_memory_config(void) { int i; - if (!netmap_memory_config_changed()) goto out; @@ -693,7 +693,7 @@ netmap_memory_config(void) /* reset previous allocation */ for (i = 0; i < NETMAP_POOLS_NR; i++) { netmap_reset_obj_allocator(&nm_mem.pools[i]); - } + } nm_mem.finalized = 0; } diff --git a/sys/dev/re/if_re.c b/sys/dev/re/if_re.c index 6d42e422f..9cc557729 100644 --- a/sys/dev/re/if_re.c +++ b/sys/dev/re/if_re.c @@ -1753,8 +1753,12 @@ re_detach(device_t dev) bus_teardown_intr(dev, sc->rl_irq[0], sc->rl_intrhand[0]); sc->rl_intrhand[0] = NULL; } - if (ifp != NULL) + if (ifp != NULL) { +#ifdef DEV_NETMAP + netmap_detach(ifp); +#endif /* DEV_NETMAP */ if_free(ifp); + } if ((sc->rl_flags & (RL_FLAG_MSI | RL_FLAG_MSIX)) == 0) rid = 0; else @@ -1843,9 +1847,6 @@ re_detach(device_t dev) bus_dma_tag_destroy(sc->rl_ldata.rl_stag); } -#ifdef DEV_NETMAP - netmap_detach(ifp); -#endif /* DEV_NETMAP */ if (sc->rl_parent_tag) bus_dma_tag_destroy(sc->rl_parent_tag); @@ -2110,11 +2111,9 @@ re_rxeof(struct rl_softc *sc, int *rx_npktsp) ifp = sc->rl_ifp; #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - NA(ifp)->rx_rings[0].nr_kflags |= NKR_PENDINTR; - selwakeuppri(&NA(ifp)->rx_rings[0].si, PI_NET); + if (netmap_rx_irq(ifp, 0 | (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT), + &rx_npkts)) return 0; - } #endif /* DEV_NETMAP */ if (ifp->if_mtu > RL_MTU && (sc->rl_flags & RL_FLAG_JUMBOV2) != 0) jumbo = 1; @@ -2358,10 +2357,8 @@ re_txeof(struct rl_softc *sc) ifp = sc->rl_ifp; #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - selwakeuppri(&NA(ifp)->tx_rings[0].si, PI_NET); + if (netmap_tx_irq(ifp, 0 | (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT))) return; - } #endif /* DEV_NETMAP */ /* Invalidate the TX descriptor list */ bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag, diff --git a/sys/net/netmap.h b/sys/net/netmap.h index 638cb1826..cdeb10eea 100644 --- a/sys/net/netmap.h +++ b/sys/net/netmap.h @@ -32,7 +32,6 @@ /* * $FreeBSD$ - * $Id: netmap.h 11997 2013-01-17 21:59:12Z luigi $ * * Definitions of constants and the structures used by the netmap * framework, for the part visible to both kernel and userspace. diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h index cefbec773..75f211e33 100644 --- a/sys/net/netmap_user.h +++ b/sys/net/netmap_user.h @@ -32,7 +32,6 @@ /* * $FreeBSD$ - * $Id: netmap_user.h 10597 2012-02-21 05:08:32Z luigi $ * * This header contains the macros used to manipulate netmap structures * and packets in userspace. See netmap(4) for more information. diff --git a/tools/tools/netmap/Makefile b/tools/tools/netmap/Makefile index 4b682e52a..2593a2717 100644 --- a/tools/tools/netmap/Makefile +++ b/tools/tools/netmap/Makefile @@ -5,7 +5,7 @@ # we can just define 'progs' and create custom targets. PROGS = pkt-gen bridge testpcap libnetmap.so -CLEANFILES = $(PROGS) pcap.o +CLEANFILES = $(PROGS) pcap.o nm_util.o NO_MAN= CFLAGS += -Werror -Wall -nostdinc -I/usr/include -I../../../sys CFLAGS += -Wextra @@ -17,9 +17,12 @@ LDFLAGS += -lpthread -lpcap all: $(PROGS) +pkt-gen bridge: nm_util.o + $(CC) $(CFLAGS) -o ${.TARGET} ${.TARGET:=.c} nm_util.o $(LDFLAGS) + testpcap: pcap.c libnetmap.so - $(CC) $(CFLAGS) -L. -lnetmap -o ${.TARGET} pcap.c + $(CC) $(CFLAGS) -DTEST -L. -lnetmap -o ${.TARGET} pcap.c -libnetmap.so: pcap.c +libnetmap.so: pcap.c nm_util.c $(CC) $(CFLAGS) -fpic -c ${.ALLSRC} $(CC) -shared -o ${.TARGET} ${.ALLSRC:.c=.o} diff --git a/tools/tools/netmap/README b/tools/tools/netmap/README index 35207904e..2bde6f2ab 100644 --- a/tools/tools/netmap/README +++ b/tools/tools/netmap/README @@ -21,3 +21,4 @@ BSD netmap 0.77 3.82 ports/trafshow (version 5) 0.94 7.7 net-mgmt/ipcad (ip accounting daemon) 0.9 5.0 net-mgmt/darkstat (ip accounting + graphing) + 0.83 2.45 net-mgmt/iftop (curses traffic display) diff --git a/tools/tools/netmap/bridge.c b/tools/tools/netmap/bridge.c index 0e9e44216..473ee0c70 100644 --- a/tools/tools/netmap/bridge.c +++ b/tools/tools/netmap/bridge.c @@ -9,195 +9,24 @@ * $FreeBSD$ */ -#include -#include /* signal */ -#include -#include -#include /* strcmp */ -#include /* open */ -#include /* close */ +#include "nm_util.h" -#include /* le64toh */ -#include /* PROT_* */ -#include /* ioctl */ -#include -#include -#include /* sockaddr.. */ -#include /* ntohs */ - -#include /* ifreq */ -#include -#include -#include - -#include /* sockaddr_in */ - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) int verbose = 0; -/* debug support */ -#define ND(format, ...) {} -#define D(format, ...) do { \ - if (!verbose) break; \ - struct timeval _xxts; \ - gettimeofday(&_xxts, NULL); \ - fprintf(stderr, "%03d.%06d %s [%d] " format "\n", \ - (int)_xxts.tv_sec %1000, (int)_xxts.tv_usec, \ - __FUNCTION__, __LINE__, ##__VA_ARGS__); \ - } while (0) - - -char *version = "$Id: bridge.c 10857 2012-04-06 12:18:22Z luigi $"; +char *version = "$Id: bridge.c 12016 2013-01-23 17:24:22Z luigi $"; static int do_abort = 0; -/* - * info on a ring we handle - */ -struct my_ring { - const char *ifname; - int fd; - char *mem; /* userspace mmap address */ - u_int memsize; - u_int queueid; - u_int begin, end; /* first..last+1 rings to check */ - struct netmap_if *nifp; - struct netmap_ring *tx, *rx; /* shortcuts */ - - uint32_t if_flags; - uint32_t if_reqcap; - uint32_t if_curcap; -}; - static void -sigint_h(__unused int sig) +sigint_h(int sig) { + (void)sig; /* UNUSED */ do_abort = 1; signal(SIGINT, SIG_DFL); } -static int -do_ioctl(struct my_ring *me, unsigned long what) -{ - struct ifreq ifr; - int error; - - bzero(&ifr, sizeof(ifr)); - strncpy(ifr.ifr_name, me->ifname, sizeof(ifr.ifr_name)); - switch (what) { - case SIOCSIFFLAGS: - ifr.ifr_flagshigh = me->if_flags >> 16; - ifr.ifr_flags = me->if_flags & 0xffff; - break; - case SIOCSIFCAP: - ifr.ifr_reqcap = me->if_reqcap; - ifr.ifr_curcap = me->if_curcap; - break; - } - error = ioctl(me->fd, what, &ifr); - if (error) { - D("ioctl error 0x%lx", what); - return error; - } - switch (what) { - case SIOCGIFFLAGS: - me->if_flags = (ifr.ifr_flagshigh << 16) | - (0xffff & ifr.ifr_flags); - if (verbose) - D("flags are 0x%x", me->if_flags); - break; - - case SIOCGIFCAP: - me->if_reqcap = ifr.ifr_reqcap; - me->if_curcap = ifr.ifr_curcap; - if (verbose) - D("curcap are 0x%x", me->if_curcap); - break; - } - return 0; -} - -/* - * open a device. if me->mem is null then do an mmap. - */ -static int -netmap_open(struct my_ring *me, int ringid) -{ - int fd, err, l; - struct nmreq req; - - me->fd = fd = open("/dev/netmap", O_RDWR); - if (fd < 0) { - D("Unable to open /dev/netmap"); - return (-1); - } - bzero(&req, sizeof(req)); - strncpy(req.nr_name, me->ifname, sizeof(req.nr_name)); - req.nr_ringid = ringid; - req.nr_version = NETMAP_API; - err = ioctl(fd, NIOCGINFO, &req); - if (err) { - D("cannot get info on %s", me->ifname); - goto error; - } - me->memsize = l = req.nr_memsize; - if (verbose) - D("memsize is %d MB", l>>20); - err = ioctl(fd, NIOCREGIF, &req); - if (err) { - D("Unable to register %s", me->ifname); - goto error; - } - - if (me->mem == NULL) { - me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); - if (me->mem == MAP_FAILED) { - D("Unable to mmap"); - me->mem = NULL; - goto error; - } - } - - me->nifp = NETMAP_IF(me->mem, req.nr_offset); - me->queueid = ringid; - if (ringid & NETMAP_SW_RING) { - me->begin = req.nr_rx_rings; - me->end = me->begin + 1; - me->tx = NETMAP_TXRING(me->nifp, req.nr_tx_rings); - me->rx = NETMAP_RXRING(me->nifp, req.nr_rx_rings); - } else if (ringid & NETMAP_HW_RING) { - D("XXX check multiple threads"); - me->begin = ringid & NETMAP_RING_MASK; - me->end = me->begin + 1; - me->tx = NETMAP_TXRING(me->nifp, me->begin); - me->rx = NETMAP_RXRING(me->nifp, me->begin); - } else { - me->begin = 0; - me->end = req.nr_rx_rings; // XXX max of the two - me->tx = NETMAP_TXRING(me->nifp, 0); - me->rx = NETMAP_RXRING(me->nifp, 0); - } - return (0); -error: - close(me->fd); - return -1; -} - - -static int -netmap_close(struct my_ring *me) -{ - D(""); - if (me->mem) - munmap(me->mem, me->memsize); - ioctl(me->fd, NIOCUNREGIF, NULL); - close(me->fd); - return (0); -} - - /* * move up to 'limit' pkts from rxring to txring swapping buffers. */ @@ -237,7 +66,7 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring, if (rs->len < 14 || rs->len > 2048) D("wrong len %d rx[%d] -> tx[%d]", rs->len, j, k); else if (verbose > 1) - D("send len %d rx[%d] -> tx[%d]", rs->len, j, k); + D("%s send len %d rx[%d] -> tx[%d]", msg, rs->len, j, k); ts->len = rs->len; /* report the buffer change. */ @@ -251,7 +80,7 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring, rxring->cur = j; txring->cur = k; if (verbose && m > 0) - D("sent %d packets to %p", m, txring); + D("%s sent %d packets to %p", msg, m, txring); return (m); } @@ -287,7 +116,7 @@ move(struct my_ring *src, struct my_ring *dst, u_int limit) * how many packets on this set of queues ? */ static int -howmany(struct my_ring *me, int tx) +pkt_queued(struct my_ring *me, int tx) { u_int i, tot = 0; @@ -337,6 +166,7 @@ main(int argc, char **argv) while ( (ch = getopt(argc, argv, "b:i:vw:")) != -1) { switch (ch) { + default: D("bad option %c %s", ch, optarg); usage(); break; @@ -361,6 +191,7 @@ main(int argc, char **argv) } } + argc -= optind; argv += optind; @@ -394,44 +225,12 @@ main(int argc, char **argv) /* two different interfaces. Take all rings on if1 */ i = 0; // all hw rings } - if (netmap_open(me, i)) + if (netmap_open(me, i, 1)) return (1); me[1].mem = me[0].mem; /* copy the pointer, so only one mmap */ - if (netmap_open(me+1, 0)) + if (netmap_open(me+1, 0, 1)) return (1); - /* if bridging two interfaces, set promisc mode */ - if (i != NETMAP_SW_RING) { - do_ioctl(me, SIOCGIFFLAGS); - if ((me[0].if_flags & IFF_UP) == 0) { - D("%s is down, bringing up...", me[0].ifname); - me[0].if_flags |= IFF_UP; - } - me[0].if_flags |= IFF_PPROMISC; - do_ioctl(me, SIOCSIFFLAGS); - - do_ioctl(me+1, SIOCGIFFLAGS); - me[1].if_flags |= IFF_PPROMISC; - do_ioctl(me+1, SIOCSIFFLAGS); - - /* also disable checksums etc. */ - do_ioctl(me, SIOCGIFCAP); - me[0].if_reqcap = me[0].if_curcap; - me[0].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE); - do_ioctl(me+0, SIOCSIFCAP); - } - do_ioctl(me+1, SIOCGIFFLAGS); - if ((me[1].if_flags & IFF_UP) == 0) { - D("%s is down, bringing up...", me[1].ifname); - me[1].if_flags |= IFF_UP; - } - do_ioctl(me+1, SIOCSIFFLAGS); - - do_ioctl(me+1, SIOCGIFCAP); - me[1].if_reqcap = me[1].if_curcap; - me[1].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE); - do_ioctl(me+1, SIOCSIFCAP); - /* setup poll(2) variables. */ memset(pollfd, 0, sizeof(pollfd)); for (i = 0; i < 2; i++) { @@ -451,8 +250,8 @@ main(int argc, char **argv) int n0, n1, ret; pollfd[0].events = pollfd[1].events = 0; pollfd[0].revents = pollfd[1].revents = 0; - n0 = howmany(me, 0); - n1 = howmany(me + 1, 0); + n0 = pkt_queued(me, 0); + n1 = pkt_queued(me + 1, 0); if (n0) pollfd[1].events |= POLLOUT; else @@ -468,14 +267,14 @@ main(int argc, char **argv) ret <= 0 ? "timeout" : "ok", pollfd[0].events, pollfd[0].revents, - howmany(me, 0), + pkt_queued(me, 0), me[0].rx->cur, - howmany(me, 1), + pkt_queued(me, 1), pollfd[1].events, pollfd[1].revents, - howmany(me+1, 0), + pkt_queued(me+1, 0), me[1].rx->cur, - howmany(me+1, 1) + pkt_queued(me+1, 1) ); if (ret < 0) continue; diff --git a/tools/tools/netmap/nm_util.c b/tools/tools/netmap/nm_util.c new file mode 100644 index 000000000..2b2c0ca3f --- /dev/null +++ b/tools/tools/netmap/nm_util.c @@ -0,0 +1,251 @@ +/* + * Copyright (C) 2012 Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $FreeBSD$ + * $Id$ + * + * utilities to use netmap devices. + * This does the basic functions of opening a device and issuing + * ioctls() + */ + +#include "nm_util.h" + +extern int verbose; + +int +nm_do_ioctl(struct my_ring *me, u_long what, int subcmd) +{ + struct ifreq ifr; + int error; +#if defined( __FreeBSD__ ) || defined (__APPLE__) + int fd = me->fd; +#endif +#ifdef linux + struct ethtool_value eval; + int fd; + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) { + printf("Error: cannot get device control socket.\n"); + return -1; + } +#endif /* linux */ + + (void)subcmd; // unused + bzero(&ifr, sizeof(ifr)); + strncpy(ifr.ifr_name, me->ifname, sizeof(ifr.ifr_name)); + switch (what) { + case SIOCSIFFLAGS: +#ifndef __APPLE__ + ifr.ifr_flagshigh = me->if_flags >> 16; +#endif + ifr.ifr_flags = me->if_flags & 0xffff; + break; + +#if defined( __FreeBSD__ ) + case SIOCSIFCAP: + ifr.ifr_reqcap = me->if_reqcap; + ifr.ifr_curcap = me->if_curcap; + break; +#endif +#ifdef linux + case SIOCETHTOOL: + eval.cmd = subcmd; + eval.data = 0; + ifr.ifr_data = (caddr_t)&eval; + break; +#endif /* linux */ + } + error = ioctl(fd, what, &ifr); + if (error) + goto done; + switch (what) { + case SIOCGIFFLAGS: +#ifndef __APPLE__ + me->if_flags = (ifr.ifr_flagshigh << 16) | + (0xffff & ifr.ifr_flags); +#endif + if (verbose) + D("flags are 0x%x", me->if_flags); + break; + +#if defined( __FreeBSD__ ) + case SIOCGIFCAP: + me->if_reqcap = ifr.ifr_reqcap; + me->if_curcap = ifr.ifr_curcap; + if (verbose) + D("curcap are 0x%x", me->if_curcap); + break; +#endif /* __FreeBSD__ */ + } +done: +#ifdef linux + close(fd); +#endif + if (error) + D("ioctl error %d %lu", error, what); + return error; +} + +/* + * open a device. if me->mem is null then do an mmap. + * Returns the file descriptor. + * The extra flag checks configures promisc mode. + */ +int +netmap_open(struct my_ring *me, int ringid, int promisc) +{ + int fd, err, l; + struct nmreq req; + + me->fd = fd = open("/dev/netmap", O_RDWR); + if (fd < 0) { + D("Unable to open /dev/netmap"); + return (-1); + } + bzero(&req, sizeof(req)); + req.nr_version = NETMAP_API; + strncpy(req.nr_name, me->ifname, sizeof(req.nr_name)); + req.nr_ringid = ringid; + err = ioctl(fd, NIOCGINFO, &req); + if (err) { + D("cannot get info on %s, errno %d ver %d", + me->ifname, errno, req.nr_version); + goto error; + } + me->memsize = l = req.nr_memsize; + if (verbose) + D("memsize is %d MB", l>>20); + err = ioctl(fd, NIOCREGIF, &req); + if (err) { + D("Unable to register %s", me->ifname); + goto error; + } + + if (me->mem == NULL) { + me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); + if (me->mem == MAP_FAILED) { + D("Unable to mmap"); + me->mem = NULL; + goto error; + } + } + + + /* Set the operating mode. */ + if (ringid != NETMAP_SW_RING) { + nm_do_ioctl(me, SIOCGIFFLAGS, 0); + if ((me[0].if_flags & IFF_UP) == 0) { + D("%s is down, bringing up...", me[0].ifname); + me[0].if_flags |= IFF_UP; + } + if (promisc) { + me[0].if_flags |= IFF_PPROMISC; + nm_do_ioctl(me, SIOCSIFFLAGS, 0); + } + +#ifdef __FreeBSD__ + /* also disable checksums etc. */ + nm_do_ioctl(me, SIOCGIFCAP, 0); + me[0].if_reqcap = me[0].if_curcap; + me[0].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE); + nm_do_ioctl(me+0, SIOCSIFCAP, 0); +#endif +#ifdef linux + /* disable: + * - generic-segmentation-offload + * - tcp-segmentation-offload + * - rx-checksumming + * - tx-checksumming + * XXX check how to set back the caps. + */ + nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_SGSO); + nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_STSO); + nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_SRXCSUM); + nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_STXCSUM); +#endif /* linux */ + } + + me->nifp = NETMAP_IF(me->mem, req.nr_offset); + me->queueid = ringid; + if (ringid & NETMAP_SW_RING) { + me->begin = req.nr_rx_rings; + me->end = me->begin + 1; + me->tx = NETMAP_TXRING(me->nifp, req.nr_tx_rings); + me->rx = NETMAP_RXRING(me->nifp, req.nr_rx_rings); + } else if (ringid & NETMAP_HW_RING) { + D("XXX check multiple threads"); + me->begin = ringid & NETMAP_RING_MASK; + me->end = me->begin + 1; + me->tx = NETMAP_TXRING(me->nifp, me->begin); + me->rx = NETMAP_RXRING(me->nifp, me->begin); + } else { + me->begin = 0; + me->end = req.nr_rx_rings; // XXX max of the two + me->tx = NETMAP_TXRING(me->nifp, 0); + me->rx = NETMAP_RXRING(me->nifp, 0); + } + return (0); +error: + close(me->fd); + return -1; +} + + +int +netmap_close(struct my_ring *me) +{ + D(""); + if (me->mem) + munmap(me->mem, me->memsize); + ioctl(me->fd, NIOCUNREGIF, NULL); + close(me->fd); + return (0); +} + + +/* + * how many packets on this set of queues ? + */ +int +pkt_queued(struct my_ring *me, int tx) +{ + u_int i, tot = 0; + + ND("me %p begin %d end %d", me, me->begin, me->end); + for (i = me->begin; i < me->end; i++) { + struct netmap_ring *ring = tx ? + NETMAP_TXRING(me->nifp, i) : NETMAP_RXRING(me->nifp, i); + tot += ring->avail; + } + if (0 && verbose && tot && !tx) + D("ring %s %s %s has %d avail at %d", + me->ifname, tx ? "tx": "rx", + me->end >= me->nifp->ni_tx_rings ? // XXX who comes first ? + "host":"net", + tot, NETMAP_TXRING(me->nifp, me->begin)->cur); + return tot; +} diff --git a/tools/tools/netmap/nm_util.h b/tools/tools/netmap/nm_util.h new file mode 100644 index 000000000..0d64f131f --- /dev/null +++ b/tools/tools/netmap/nm_util.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2012 Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $FreeBSD$ + * $Id$ + * + * Some utilities to build netmap-based programs. + */ + +#ifndef _NM_UTIL_H +#define _NM_UTIL_H +#include +#include /* signal */ +#include +#include +#include /* PRI* macros */ +#include /* strcmp */ +#include /* open */ +#include /* close */ +#include /* getifaddrs */ + +#include /* PROT_* */ +#include /* ioctl */ +#include +#include /* sockaddr.. */ +#include /* ntohs */ +#include +#include /* sysctl */ +#include /* timersub */ + +#include +#include /* ifreq */ + +#include +#include +#include + +#include +#include + +#ifndef MY_PCAP /* use the system's pcap if available */ + +#ifdef NO_PCAP +#define PCAP_ERRBUF_SIZE 512 +typedef void pcap_t; +struct pcap_pkthdr; +#define pcap_inject(a,b,c) ((void)a, (void)b, (void)c, -1) +#define pcap_dispatch(a, b, c, d) (void)c +#define pcap_open_live(a, b, c, d, e) ((void)e, NULL) +#else /* !NO_PCAP */ +#include // XXX do we need it ? +#endif /* !NO_PCAP */ + +#endif // XXX hack + +#include /* pthread_* */ + +#ifdef linux +#define ifr_flagshigh ifr_flags +#define ifr_curcap ifr_flags +#define ifr_reqcap ifr_flags +#define IFF_PPROMISC IFF_PROMISC +#include +#include + +#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME +#include /* ether_aton */ +#include /* sockaddr_ll */ +#endif /* linux */ + +#ifdef __FreeBSD__ +#include /* le64toh */ +#include + +#include /* pthread w/ affinity */ +#include /* cpu_set */ +#include /* LLADDR */ +#endif /* __FreeBSD__ */ + +#ifdef __APPLE__ +#define ifr_flagshigh ifr_flags // XXX +#define IFF_PPROMISC IFF_PROMISC +#include /* LLADDR */ +#define clock_gettime(a,b) \ + do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) +#endif /* __APPLE__ */ + +static inline int min(int a, int b) { return a < b ? a : b; } +extern int time_second; + +/* debug support */ +#define ND(format, ...) do {} while(0) +#define D(format, ...) \ + fprintf(stderr, "%s [%d] " format "\n", \ + __FUNCTION__, __LINE__, ##__VA_ARGS__) + +#define RD(lps, format, ...) \ + do { \ + static int t0, cnt; \ + if (t0 != time_second) { \ + t0 = time_second; \ + cnt = 0; \ + } \ + if (cnt++ < lps) \ + D(format, ##__VA_ARGS__); \ + } while (0) + + + +// XXX does it work on 32-bit machines ? +static inline void prefetch (const void *x) +{ + __asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x)); +} + +// XXX only for multiples of 64 bytes, non overlapped. +static inline void +pkt_copy(const void *_src, void *_dst, int l) +{ + const uint64_t *src = _src; + uint64_t *dst = _dst; +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + if (unlikely(l >= 1024)) { + bcopy(src, dst, l); + return; + } + for (; l > 0; l-=64) { + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + } +} + +/* + * info on a ring we handle + */ +struct my_ring { + const char *ifname; + int fd; + char *mem; /* userspace mmap address */ + u_int memsize; + u_int queueid; + u_int begin, end; /* first..last+1 rings to check */ + struct netmap_if *nifp; + struct netmap_ring *tx, *rx; /* shortcuts */ + + uint32_t if_flags; + uint32_t if_reqcap; + uint32_t if_curcap; +}; +int netmap_open(struct my_ring *me, int ringid, int promisc); +int netmap_close(struct my_ring *me); +int nm_do_ioctl(struct my_ring *me, u_long what, int subcmd); +#endif /* _NM_UTIL_H */ diff --git a/tools/tools/netmap/pcap.c b/tools/tools/netmap/pcap.c index 2125176b6..c2acd1a4b 100644 --- a/tools/tools/netmap/pcap.c +++ b/tools/tools/netmap/pcap.c @@ -1,5 +1,5 @@ /* - * (C) 2011 Luigi Rizzo + * (C) 2011-2012 Luigi Rizzo * * BSD license * @@ -10,81 +10,18 @@ * $FreeBSD$ */ -#include -#include /* signal */ -#include -#include -#include /* strcmp */ -#include /* open */ -#include /* close */ +#define MY_PCAP +#include "nm_util.h" -#include /* le64toh */ -#include /* PROT_* */ -#include /* ioctl */ -#include -#include -#include /* sockaddr.. */ -#include /* ntohs */ - -#include /* ifreq */ -#include -#include -#include - -#include /* sockaddr_in */ - -#include -#include - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -const char *version = "$Id$"; +char *version = "$Id: pcap.c 11463 2012-07-30 15:26:02Z luigi $"; int verbose = 0; -/* debug support */ -#define ND(format, ...) do {} while (0) -#define D(format, ...) do { \ - if (verbose) \ - fprintf(stderr, "--- %s [%d] " format "\n", \ - __FUNCTION__, __LINE__, ##__VA_ARGS__); \ - } while (0) - -static inline void prefetch (const void *x) -{ - __asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x)); -} - -// XXX only for multiples of 64 bytes, non overlapped. -static inline void -pkt_copy(const void *_src, void *_dst, int l) -{ - const uint64_t *src = _src; - uint64_t *dst = _dst; -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) - if (unlikely(l >= 1024)) { - bcopy(src, dst, l); - return; - } - for (; l > 0; l-=64) { - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - } -} - /* * We redefine here a number of structures that are in pcap.h * so we can compile this file without the system header. */ #ifndef PCAP_ERRBUF_SIZE #define PCAP_ERRBUF_SIZE 128 - /* * Each packet is accompanied by a header including the timestamp, * captured size and actual size. @@ -135,12 +72,13 @@ typedef enum { PCAP_D_OUT } pcap_direction_t; -struct bpf_program; typedef void (*pcap_handler)(u_char *user, const struct pcap_pkthdr *h, const u_char *bytes); +char errbuf[PCAP_ERRBUF_SIZE]; + pcap_t *pcap_open_live(const char *device, int snaplen, int promisc, int to_ms, char *errbuf); @@ -154,24 +92,6 @@ char *pcap_lookupdev(char *errbuf); int pcap_inject(pcap_t *p, const void *buf, size_t size); int pcap_fileno(pcap_t *p); const char *pcap_lib_version(void); -void pcap_freealldevs(pcap_if_t *); -pcap_t *pcap_create(const char *, char *); -int pcap_activate(pcap_t *); -int pcap_can_set_rfmon(pcap_t *); -int pcap_set_snaplen(pcap_t *, int); -int pcap_snapshot(pcap_t *); -int pcap_lookupnet(const char *, uint32_t *, uint32_t *, char *); -int pcap_set_promisc(pcap_t *, int); -int pcap_set_timeout(pcap_t *, int); -int pcap_compile(pcap_t *, struct bpf_program *, const char *, int, - uint32_t); -int pcap_setfilter(pcap_t *, struct bpf_program *); -int pcap_datalink(pcap_t *); -const char *pcap_datalink_val_to_name(int); -const char *pcap_datalink_val_to_description(int); -int pcap_stats(pcap_t *, struct pcap_stat *); -int pcap_loop(pcap_t *, int, pcap_handler, u_char *); -char *pcap_geterr(pcap_t *); struct eproto { @@ -180,7 +100,7 @@ struct eproto { }; #endif /* !PCAP_ERRBUF_SIZE */ -#ifdef __PIC__ +#ifndef TEST /* * build as a shared library */ @@ -190,8 +110,12 @@ char pcap_version[] = "libnetmap version 0.3"; /* * Our equivalent of pcap_t */ -struct my_ring { - struct nmreq nmr; +struct pcap_ring { + struct my_ring me; +#if 0 + const char *ifname; + + //struct nmreq nmr; int fd; char *mem; /* userspace mmap address */ @@ -200,6 +124,10 @@ struct my_ring { u_int begin, end; /* first..last+1 rings to check */ struct netmap_if *nifp; + uint32_t if_flags; + uint32_t if_reqcap; + uint32_t if_curcap; +#endif int snaplen; char *errbuf; int promisc; @@ -207,9 +135,6 @@ struct my_ring { struct pcap_pkthdr hdr; - uint32_t if_flags; - uint32_t if_reqcap; - uint32_t if_curcap; struct pcap_stat st; @@ -217,114 +142,6 @@ struct my_ring { }; -static int -do_ioctl(struct my_ring *me, unsigned long what) -{ - struct ifreq ifr; - int error; - - bzero(&ifr, sizeof(ifr)); - strncpy(ifr.ifr_name, me->nmr.nr_name, sizeof(ifr.ifr_name)); - switch (what) { - case SIOCSIFFLAGS: - D("call SIOCSIFFLAGS 0x%x", me->if_flags); - ifr.ifr_flagshigh = (me->if_flags >> 16) & 0xffff; - ifr.ifr_flags = me->if_flags & 0xffff; - break; - case SIOCSIFCAP: - ifr.ifr_reqcap = me->if_reqcap; - ifr.ifr_curcap = me->if_curcap; - break; - } - error = ioctl(me->fd, what, &ifr); - if (error) { - D("ioctl 0x%lx error %d", what, error); - return error; - } - switch (what) { - case SIOCSIFFLAGS: - case SIOCGIFFLAGS: - me->if_flags = (ifr.ifr_flagshigh << 16) | - (0xffff & ifr.ifr_flags); - D("flags are L 0x%x H 0x%x 0x%x", - (uint16_t)ifr.ifr_flags, - (uint16_t)ifr.ifr_flagshigh, me->if_flags); - break; - - case SIOCGIFCAP: - me->if_reqcap = ifr.ifr_reqcap; - me->if_curcap = ifr.ifr_curcap; - D("curcap are 0x%x", me->if_curcap); - break; - } - return 0; -} - - -/* - * open a device. if me->mem is null then do an mmap. - */ -static int -netmap_open(struct my_ring *me, int ringid) -{ - int fd, err, l; - u_int i; - struct nmreq req; - - me->fd = fd = open("/dev/netmap", O_RDWR); - if (fd < 0) { - D("Unable to open /dev/netmap"); - return (-1); - } - bzero(&req, sizeof(req)); - strncpy(req.nr_name, me->nmr.nr_name, sizeof(req.nr_name)); - req.nr_ringid = ringid; - req.nr_version = NETMAP_API; - err = ioctl(fd, NIOCGINFO, &req); - if (err) { - D("cannot get info on %s", me->nmr.nr_name); - goto error; - } - me->memsize = l = req.nr_memsize; - ND("memsize is %d MB", l>>20); - err = ioctl(fd, NIOCREGIF, &req); - if (err) { - D("Unable to register %s", me->nmr.nr_name); - goto error; - } - - if (me->mem == NULL) { - me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); - if (me->mem == MAP_FAILED) { - D("Unable to mmap"); - me->mem = NULL; - goto error; - } - } - - me->nifp = NETMAP_IF(me->mem, req.nr_offset); - me->queueid = ringid; - if (ringid & NETMAP_SW_RING) { - me->begin = req.nr_rx_rings; - me->end = me->begin + 1; - } else if (ringid & NETMAP_HW_RING) { - me->begin = ringid & NETMAP_RING_MASK; - me->end = me->begin + 1; - } else { - me->begin = 0; - me->end = req.nr_rx_rings; - } - /* request timestamps for packets */ - for (i = me->begin; i < me->end; i++) { - struct netmap_ring *ring = NETMAP_RXRING(me->nifp, i); - ring->flags = NR_TIMESTAMP; - } - //me->tx = NETMAP_TXRING(me->nifp, 0); - return (0); -error: - close(me->fd); - return -1; -} /* * There is a set of functions that tcpdump expects even if probably @@ -343,10 +160,12 @@ const char *pcap_lib_version(void) } int -pcap_findalldevs(pcap_if_t **alldevsp, __unused char *errbuf) +pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf) { + pcap_if_t *top = NULL; +#ifndef linux struct ifaddrs *i_head, *i; - pcap_if_t *top = NULL, *cur; + pcap_if_t *cur; struct pcap_addr *tail = NULL; int l; @@ -397,7 +216,7 @@ pcap_findalldevs(pcap_if_t **alldevsp, __unused char *errbuf) } #define SA_NEXT(x) ((struct sockaddr *)((char *)(x) + (x)->sa_len)) pca->addr = (struct sockaddr *)(pca + 1); - bcopy(i->ifa_addr, pca->addr, i->ifa_addr->sa_len); + pkt_copy(i->ifa_addr, pca->addr, i->ifa_addr->sa_len); if (i->ifa_netmask) { pca->netmask = SA_NEXT(pca->addr); bcopy(i->ifa_netmask, pca->netmask, i->ifa_netmask->sa_len); @@ -415,12 +234,15 @@ pcap_findalldevs(pcap_if_t **alldevsp, __unused char *errbuf) } freeifaddrs(i_head); +#endif /* !linux */ + (void)errbuf; /* UNUSED */ *alldevsp = top; return 0; } -void pcap_freealldevs(__unused pcap_if_t *alldevs) +void pcap_freealldevs(pcap_if_t *alldevs) { + (void)alldevs; /* UNUSED */ D("unimplemented"); } @@ -447,8 +269,9 @@ pcap_activate(pcap_t *p) } int -pcap_can_set_rfmon(__unused pcap_t *p) +pcap_can_set_rfmon(pcap_t *p) { + (void)p; /* UNUSED */ D(""); return 0; /* no we can't */ } @@ -456,7 +279,7 @@ pcap_can_set_rfmon(__unused pcap_t *p) int pcap_set_snaplen(pcap_t *p, int snaplen) { - struct my_ring *me = p; + struct pcap_ring *me = p; D("len %d", snaplen); me->snaplen = snaplen; @@ -466,7 +289,7 @@ pcap_set_snaplen(pcap_t *p, int snaplen) int pcap_snapshot(pcap_t *p) { - struct my_ring *me = p; + struct pcap_ring *me = p; D("len %d", me->snaplen); return me->snaplen; @@ -474,9 +297,10 @@ pcap_snapshot(pcap_t *p) int pcap_lookupnet(const char *device, uint32_t *netp, - uint32_t *maskp, __unused char *errbuf) + uint32_t *maskp, char *errbuf) { + (void)errbuf; /* UNUSED */ D("device %s", device); inet_aton("10.0.0.255", (struct in_addr *)netp); inet_aton("255.255.255.0",(struct in_addr *) maskp); @@ -486,17 +310,17 @@ pcap_lookupnet(const char *device, uint32_t *netp, int pcap_set_promisc(pcap_t *p, int promisc) { - struct my_ring *me = p; + struct pcap_ring *me = p; D("promisc %d", promisc); - if (do_ioctl(me, SIOCGIFFLAGS)) + if (nm_do_ioctl(&me->me, SIOCGIFFLAGS, 0)) D("SIOCGIFFLAGS failed"); if (promisc) { - me->if_flags |= IFF_PPROMISC; + me->me.if_flags |= IFF_PPROMISC; } else { - me->if_flags &= ~IFF_PPROMISC; + me->me.if_flags &= ~IFF_PPROMISC; } - if (do_ioctl(me, SIOCSIFFLAGS)) + if (nm_do_ioctl(&me->me, SIOCSIFFLAGS, 0)) D("SIOCSIFFLAGS failed"); return 0; } @@ -504,7 +328,7 @@ pcap_set_promisc(pcap_t *p, int promisc) int pcap_set_timeout(pcap_t *p, int to_ms) { - struct my_ring *me = p; + struct pcap_ring *me = p; D("%d ms", to_ms); me->to_ms = to_ms; @@ -514,23 +338,30 @@ pcap_set_timeout(pcap_t *p, int to_ms) struct bpf_program; int -pcap_compile(__unused pcap_t *p, __unused struct bpf_program *fp, - const char *str, __unused int optimize, __unused uint32_t netmask) +pcap_compile(pcap_t *p, struct bpf_program *fp, + const char *str, int optimize, uint32_t netmask) { + (void)p; /* UNUSED */ + (void)fp; /* UNUSED */ + (void)optimize; /* UNUSED */ + (void)netmask; /* UNUSED */ D("%s", str); return 0; } int -pcap_setfilter(__unused pcap_t *p, __unused struct bpf_program *fp) +pcap_setfilter(pcap_t *p, struct bpf_program *fp) { + (void)p; /* UNUSED */ + (void)fp; /* UNUSED */ D(""); return 0; } int -pcap_datalink(__unused pcap_t *p) +pcap_datalink(pcap_t *p) { + (void)p; /* UNUSED */ D("returns 1"); return 1; // ethernet } @@ -553,7 +384,7 @@ struct pcap_stat; int pcap_stats(pcap_t *p, struct pcap_stat *ps) { - struct my_ring *me = p; + struct pcap_ring *me = p; ND(""); *ps = me->st; @@ -563,44 +394,42 @@ pcap_stats(pcap_t *p, struct pcap_stat *ps) char * pcap_geterr(pcap_t *p) { - struct my_ring *me = p; + struct pcap_ring *me = p; D(""); return me->msg; } pcap_t * -pcap_open_live(const char *device, __unused int snaplen, - int promisc, int to_ms, __unused char *errbuf) +pcap_open_live(const char *device, int snaplen, + int promisc, int to_ms, char *errbuf) { - struct my_ring *me; + struct pcap_ring *me; + int l; + (void)snaplen; /* UNUSED */ + (void)errbuf; /* UNUSED */ + if (!device) { + D("missing device name"); + return NULL; + } + + l = strlen(device) + 1; D("request to open %s snaplen %d promisc %d timeout %dms", device, snaplen, promisc, to_ms); - me = calloc(1, sizeof(*me)); + me = calloc(1, sizeof(*me) + l); if (me == NULL) { D("failed to allocate struct for %s", device); return NULL; } - strncpy(me->nmr.nr_name, device, sizeof(me->nmr.nr_name)); - if (netmap_open(me, 0)) { + me->me.ifname = (char *)(me + 1); + strcpy((char *)me->me.ifname, device); + if (netmap_open(&me->me, 0, promisc)) { D("error opening %s", device); free(me); return NULL; } me->to_ms = to_ms; - if (do_ioctl(me, SIOCGIFFLAGS)) - D("SIOCGIFFLAGS failed"); - if (promisc) { - me->if_flags |= IFF_PPROMISC; - if (do_ioctl(me, SIOCSIFFLAGS)) - D("SIOCSIFFLAGS failed"); - } - if (do_ioctl(me, SIOCGIFCAP)) - D("SIOCGIFCAP failed"); - me->if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE); - if (do_ioctl(me, SIOCSIFCAP)) - D("SIOCSIFCAP failed"); return (pcap_t *)me; } @@ -640,15 +469,19 @@ pcap_get_selectable_fd(pcap_t *p) } int -pcap_setnonblock(__unused pcap_t *p, int nonblock, __unused char *errbuf) +pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf) { + (void)p; /* UNUSED */ + (void)errbuf; /* UNUSED */ D("mode is %d", nonblock); return 0; /* ignore */ } int -pcap_setdirection(__unused pcap_t *p, __unused pcap_direction_t d) +pcap_setdirection(pcap_t *p, pcap_direction_t d) { + (void)p; /* UNUSED */ + (void)d; /* UNUSED */ D(""); return 0; /* ignore */ }; @@ -656,7 +489,8 @@ pcap_setdirection(__unused pcap_t *p, __unused pcap_direction_t d) int pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user) { - struct my_ring *me = p; + struct pcap_ring *pme = p; + struct my_ring *me = &pme->me; int got = 0; u_int si; @@ -669,7 +503,7 @@ pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user) ND("ring has %d pkts", ring->avail); if (ring->avail == 0) continue; - me->hdr.ts = ring->ts; + pme->hdr.ts = ring->ts; /* * XXX a proper prefetch should be done as * prefetch(i); callback(i-1); ... @@ -684,15 +518,15 @@ pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user) } u_char *buf = (u_char *)NETMAP_BUF(ring, idx); prefetch(buf); - me->hdr.len = me->hdr.caplen = ring->slot[i].len; + pme->hdr.len = pme->hdr.caplen = ring->slot[i].len; // D("call %p len %d", p, me->hdr.len); - callback(user, &me->hdr, buf); + callback(user, &pme->hdr, buf); ring->cur = NETMAP_RING_NEXT(ring, i); ring->avail--; got++; } } - me->st.ps_recv += got; + pme->st.ps_recv += got; return got; } @@ -732,13 +566,13 @@ pcap_inject(pcap_t *p, const void *buf, size_t size) int pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user) { - struct my_ring *me = p; + struct pcap_ring *me = p; struct pollfd fds[1]; int i; ND("cnt %d", cnt); memset(fds, 0, sizeof(fds)); - fds[0].fd = me->fd; + fds[0].fd = me->me.fd; fds[0].events = (POLLIN); while (cnt == -1 || cnt > 0) { @@ -753,11 +587,10 @@ pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user) return 0; } -#endif /* __PIC__ */ +#endif /* !TEST */ -#ifndef __PIC__ -static void -do_send(u_char *user, const struct pcap_pkthdr *h, const u_char *buf) +#ifdef TEST /* build test code */ +void do_send(u_char *user, const struct pcap_pkthdr *h, const u_char *buf) { pcap_inject((pcap_t *)user, buf, h->caplen); } @@ -819,4 +652,4 @@ main(int argc, char **argv) return (0); } -#endif /* !__PIC__ */ +#endif /* TEST */ diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c index d0e64094e..7c2ad984a 100644 --- a/tools/tools/netmap/pkt-gen.c +++ b/tools/tools/netmap/pkt-gen.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -25,7 +25,7 @@ /* * $FreeBSD$ - * $Id: pkt-gen.c 10967 2012-05-03 11:29:23Z luigi $ + * $Id: pkt-gen.c 12024 2013-01-25 05:41:51Z luigi $ * * Example program to show how to build a multithreaded packet * source/sink using the netmap device. @@ -36,120 +36,17 @@ * */ -const char *default_payload="netmap pkt-gen Luigi Rizzo and Matteo Landi\n" +#include "nm_util.h" + +const char *default_payload="netmap pkt-gen payload\n" "http://info.iet.unipi.it/~luigi/netmap/ "; -#include -#include /* pthread_* */ -#include /* pthread w/ affinity */ -#include /* signal */ -#include -#include -#include /* PRI* macros */ -#include /* strcmp */ -#include /* open */ -#include /* close */ -#include /* getifaddrs */ - -#include /* PROT_* */ -#include /* ioctl */ -#include -#include /* sockaddr.. */ -#include /* ntohs */ -#include -#include /* cpu_set */ -#include /* sysctl */ -#include /* timersub */ - -#include -#include /* ifreq */ -#include /* LLADDR */ - -#include -#include -#include - -#include -#include -#include - - -static inline int min(int a, int b) { return a < b ? a : b; } - -/* debug support */ -#define D(format, ...) \ - fprintf(stderr, "%s [%d] " format "\n", \ - __FUNCTION__, __LINE__, ##__VA_ARGS__) - -#ifndef EXPERIMENTAL -#define EXPERIMENTAL 0 -#endif +int time_second; // support for RD() debugging macro int verbose = 0; -#define MAX_QUEUES 64 /* no need to limit */ #define SKIP_PAYLOAD 1 /* do not check payload. */ -inline void prefetch (const void *x) -{ - __asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x)); -} - -// XXX only for multiples of 64 bytes, non overlapped. -static inline void -pkt_copy(void *_src, void *_dst, int l) -{ - uint64_t *src = _src; - uint64_t *dst = _dst; -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) - if (unlikely(l >= 1024)) { - bcopy(src, dst, l); - return; - } - for (; l > 0; l-=64) { - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - } -} - - -#if EXPERIMENTAL -/* Wrapper around `rdtsc' to take reliable timestamps flushing the pipeline */ -#define netmap_rdtsc(t) \ - do { \ - u_int __regs[4]; \ - \ - do_cpuid(0, __regs); \ - (t) = rdtsc(); \ - } while (0) - -static __inline void -do_cpuid(u_int ax, u_int *p) -{ - __asm __volatile("cpuid" - : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) - : "0" (ax)); -} - -static __inline uint64_t -rdtsc(void) -{ - uint64_t rv; - - __asm __volatile("rdtsc" : "=A" (rv)); - return (rv); -} -#define MAX_SAMPLES 100000 -#endif /* EXPERIMENTAL */ - - struct pkt { struct ether_header eh; struct ip ip; @@ -157,16 +54,29 @@ struct pkt { uint8_t body[2048]; // XXX hardwired } __attribute__((__packed__)); +struct ip_range { + char *name; + struct in_addr start, end, cur; + uint16_t port0, port1, cur_p; +}; + +struct mac_range { + char *name; + struct ether_addr start, end; +}; + /* * global arguments for all threads */ + struct glob_arg { - const char *src_ip; - const char *dst_ip; - const char *src_mac; - const char *dst_mac; + struct ip_range src_ip; + struct ip_range dst_ip; + struct mac_range dst_mac; + struct mac_range src_mac; int pkt_size; int burst; + int forever; int npackets; /* total packets to send */ int nthreads; int cpus; @@ -175,13 +85,20 @@ struct glob_arg { #define OPT_ACCESS 2 #define OPT_COPY 4 #define OPT_MEMCPY 8 - int use_pcap; +#define OPT_TS 16 /* add a timestamp */ + int dev_type; pcap_t *p; -}; -struct mystat { - uint64_t containers[8]; + int affinity; + int main_fd; + int report_interval; + void *(*td_body)(void *); + void *mmap_addr; + int mmap_size; + char *ifname; }; +enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; + /* * Arguments for a new thread. The same structure is used by @@ -196,43 +113,106 @@ struct targ { struct nmreq nmr; struct netmap_if *nifp; uint16_t qfirst, qlast; /* range of queues to scan */ - uint64_t count; + volatile uint64_t count; struct timeval tic, toc; int me; pthread_t thread; int affinity; - uint8_t dst_mac[6]; - uint8_t src_mac[6]; - u_int dst_mac_range; - u_int src_mac_range; - uint32_t dst_ip; - uint32_t src_ip; - u_int dst_ip_range; - u_int src_ip_range; - struct pkt pkt; }; +/* + * extract the extremes from a range of ipv4 addresses. + * addr_lo[-addr_hi][:port_lo[-port_hi]] + */ +static void +extract_ip_range(struct ip_range *r) +{ + char *p_lo, *p_hi; + char buf1[16]; // one ip address + + D("extract IP range from %s", r->name); + p_lo = index(r->name, ':'); /* do we have ports ? */ + if (p_lo) { + D(" found ports at %s", p_lo); + *p_lo++ = '\0'; + p_hi = index(p_lo, '-'); + if (p_hi) + *p_hi++ = '\0'; + else + p_hi = p_lo; + r->port0 = strtol(p_lo, NULL, 0); + r->port1 = strtol(p_hi, NULL, 0); + if (r->port1 < r->port0) { + r->cur_p = r->port0; + r->port0 = r->port1; + r->port1 = r->cur_p; + } + r->cur_p = r->port0; + D("ports are %d to %d", r->port0, r->port1); + } + p_hi = index(r->name, '-'); /* do we have upper ip ? */ + if (p_hi) { + *p_hi++ = '\0'; + } else + p_hi = r->name; + inet_aton(r->name, &r->start); + inet_aton(p_hi, &r->end); + if (r->start.s_addr > r->end.s_addr) { + r->cur = r->start; + r->start = r->end; + r->end = r->cur; + } + r->cur = r->start; + strncpy(buf1, inet_ntoa(r->end), sizeof(buf1)); + D("range is %s %d to %s %d", inet_ntoa(r->start), r->port0, + buf1, r->port1); +} + +static void +extract_mac_range(struct mac_range *r) +{ + D("extract MAC range from %s", r->name); + bcopy(ether_aton(r->name), &r->start, 6); + bcopy(ether_aton(r->name), &r->end, 6); +#if 0 + bcopy(targ->src_mac, eh->ether_shost, 6); + p = index(targ->g->src_mac, '-'); + if (p) + targ->src_mac_range = atoi(p+1); + + bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); + bcopy(targ->dst_mac, eh->ether_dhost, 6); + p = index(targ->g->dst_mac, '-'); + if (p) + targ->dst_mac_range = atoi(p+1); +#endif + D("%s starts at %s", r->name, ether_ntoa(&r->start)); +} + static struct targ *targs; static int global_nthreads; /* control-C handler */ static void -sigint_h(__unused int sig) +sigint_h(int sig) { - for (int i = 0; i < global_nthreads; i++) - targs[i].cancel = 1; + int i; + (void)sig; /* UNUSED */ + for (i = 0; i < global_nthreads; i++) { + targs[i].cancel = 1; + } signal(SIGINT, SIG_DFL); } - /* sysctl wrapper to return the number of active CPUs */ static int system_ncpus(void) { +#ifdef __FreeBSD__ int mib[2], ncpus; size_t len; @@ -242,8 +222,32 @@ system_ncpus(void) sysctl(mib, 2, &ncpus, &len, NULL, 0); return (ncpus); +#else + return 1; +#endif /* !__FreeBSD__ */ } +#ifdef __linux__ +#define sockaddr_dl sockaddr_ll +#define sdl_family sll_family +#define AF_LINK AF_PACKET +#define LLADDR(s) s->sll_addr; +#include +#define TAP_CLONEDEV "/dev/net/tun" +#endif /* __linux__ */ + +#ifdef __FreeBSD__ +#include +#define TAP_CLONEDEV "/dev/tap" +#endif /* __FreeBSD */ + +#ifdef __APPLE__ +// #warning TAP not supported on apple ? +#include +#define TAP_CLONEDEV "/dev/tap" +#endif /* __APPLE__ */ + + /* * locate the src mac address for our interface, put it * into the user-supplied buffer. return 0 if ok, -1 on error. @@ -285,6 +289,7 @@ source_hwaddr(const char *ifname, char *buf) static int setaffinity(pthread_t me, int i) { +#ifdef __FreeBSD__ cpuset_t cpumask; if (i == -1) @@ -298,36 +303,57 @@ setaffinity(pthread_t me, int i) D("Unable to set affinity"); return 1; } +#else + (void)me; /* suppress 'unused' warnings */ + (void)i; +#endif /* __FreeBSD__ */ return 0; } /* Compute the checksum of the given ip header. */ static uint16_t -checksum(const void *data, uint16_t len) +checksum(const void *data, uint16_t len, uint32_t sum) { const uint8_t *addr = data; - uint32_t sum = 0; + uint32_t i; - while (len > 1) { - sum += addr[0] * 256 + addr[1]; - addr += 2; - len -= 2; + /* Checksum all the pairs of bytes first... */ + for (i = 0; i < (len & ~1U); i += 2) { + sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i))); + if (sum > 0xFFFF) + sum -= 0xFFFF; } + /* + * If there's a single byte left over, checksum it, too. + * Network byte order is big-endian, so the remaining byte is + * the high byte. + */ + if (i < len) { + sum += addr[i] << 8; + if (sum > 0xFFFF) + sum -= 0xFFFF; + } + return sum; +} - if (len == 1) - sum += *addr * 256; - - sum = (sum >> 16) + (sum & 0xffff); - sum += (sum >> 16); - - sum = htons(sum); - - return ~sum; +static u_int16_t +wrapsum(u_int32_t sum) +{ + sum = ~sum & 0xFFFF; + return (htons(sum)); } /* * Fill a packet with some payload. + * We create a UDP packet so the payload starts at + * 14+20+8 = 42 bytes. */ +#ifdef __linux__ +#define uh_sport source +#define uh_dport dest +#define uh_ulen len +#define uh_sum check +#endif /* linux */ static void initialize_packet(struct targ *targ) { @@ -335,9 +361,8 @@ initialize_packet(struct targ *targ) struct ether_header *eh; struct ip *ip; struct udphdr *udp; - uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(*ip); + uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip); int i, l, l0 = strlen(default_payload); - char *p; for (i = 0; i < paylen;) { l = min(l0, paylen - i); @@ -345,14 +370,8 @@ initialize_packet(struct targ *targ) i += l; } pkt->body[i-1] = '\0'; - - udp = &pkt->udp; - udp->uh_sport = htons(1234); - udp->uh_dport = htons(4321); - udp->uh_ulen = htons(paylen); - udp->uh_sum = 0; // checksum(udp, sizeof(*udp)); - ip = &pkt->ip; + ip->ip_v = IPVERSION; ip->ip_hl = 5; ip->ip_id = 0; @@ -362,29 +381,36 @@ initialize_packet(struct targ *targ) ip->ip_off = htons(IP_DF); /* Don't fragment */ ip->ip_ttl = IPDEFTTL; ip->ip_p = IPPROTO_UDP; - inet_aton(targ->g->src_ip, (struct in_addr *)&ip->ip_src); - inet_aton(targ->g->dst_ip, (struct in_addr *)&ip->ip_dst); - targ->dst_ip = ip->ip_dst.s_addr; - targ->src_ip = ip->ip_src.s_addr; - p = index(targ->g->src_ip, '-'); - if (p) { - targ->dst_ip_range = atoi(p+1); - D("dst-ip sweep %d addresses", targ->dst_ip_range); - } - ip->ip_sum = checksum(ip, sizeof(*ip)); + ip->ip_dst.s_addr = targ->g->dst_ip.cur.s_addr; + if (++targ->g->dst_ip.cur.s_addr > targ->g->dst_ip.end.s_addr) + targ->g->dst_ip.cur.s_addr = targ->g->dst_ip.start.s_addr; + ip->ip_src.s_addr = targ->g->src_ip.cur.s_addr; + if (++targ->g->src_ip.cur.s_addr > targ->g->src_ip.end.s_addr) + targ->g->src_ip.cur.s_addr = targ->g->src_ip.start.s_addr; + ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0)); - eh = &pkt->eh; - bcopy(ether_aton(targ->g->src_mac), targ->src_mac, 6); - bcopy(targ->src_mac, eh->ether_shost, 6); - p = index(targ->g->src_mac, '-'); - if (p) - targ->src_mac_range = atoi(p+1); - bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); - bcopy(targ->dst_mac, eh->ether_dhost, 6); - p = index(targ->g->dst_mac, '-'); - if (p) - targ->dst_mac_range = atoi(p+1); + udp = &pkt->udp; + udp->uh_sport = htons(targ->g->src_ip.cur_p); + if (++targ->g->src_ip.cur_p > targ->g->src_ip.port1) + targ->g->src_ip.cur_p = targ->g->src_ip.port0; + udp->uh_dport = htons(targ->g->dst_ip.cur_p); + if (++targ->g->dst_ip.cur_p > targ->g->dst_ip.port1) + targ->g->dst_ip.cur_p = targ->g->dst_ip.port0; + udp->uh_ulen = htons(paylen); + /* Magic: taken from sbin/dhclient/packet.c */ + udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp), + checksum(pkt->body, + paylen - sizeof(*udp), + checksum(&ip->ip_src, 2 * sizeof(ip->ip_src), + IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen) + ) + ) + )); + + eh = &pkt->eh; + bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); + bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); eh->ether_type = htons(ETHERTYPE_IP); } @@ -452,7 +478,6 @@ send_packets(struct netmap_ring *ring, struct pkt *pkt, memcpy(p, pkt, size); else if (options & OPT_PREFETCH) prefetch(p); - slot->len = size; if (sent == count - 1) slot->flags |= NS_REPORT; @@ -464,23 +489,198 @@ send_packets(struct netmap_ring *ring, struct pkt *pkt, return (sent); } +/* + * Send a packet, and wait for a response. + * The payload (after UDP header, ofs 42) has a 4-byte sequence + * followed by a struct timeval (or bintime?) + */ +#define PAY_OFS 42 /* where in the pkt... */ + +static void * +pinger_body(void *data) +{ + struct targ *targ = (struct targ *) data; + struct pollfd fds[1]; + struct netmap_if *nifp = targ->nifp; + int i, rx = 0, n = targ->g->npackets; + + fds[0].fd = targ->fd; + fds[0].events = (POLLIN); + static uint32_t sent; + struct timespec ts, now, last_print; + uint32_t count = 0, min = 1000000000, av = 0; + + if (targ->g->nthreads > 1) { + D("can only ping with 1 thread"); + return NULL; + } + + clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); + while (n == 0 || (int)sent < n) { + struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); + struct netmap_slot *slot; + char *p; + for (i = 0; i < 1; i++) { + slot = &ring->slot[ring->cur]; + slot->len = targ->g->pkt_size; + p = NETMAP_BUF(ring, slot->buf_idx); + + if (ring->avail == 0) { + D("-- ouch, cannot send"); + } else { + pkt_copy(&targ->pkt, p, targ->g->pkt_size); + clock_gettime(CLOCK_REALTIME_PRECISE, &ts); + bcopy(&sent, p+42, sizeof(sent)); + bcopy(&ts, p+46, sizeof(ts)); + sent++; + ring->cur = NETMAP_RING_NEXT(ring, ring->cur); + ring->avail--; + } + } + /* should use a parameter to decide how often to send */ + if (poll(fds, 1, 3000) <= 0) { + D("poll error/timeout on queue %d", targ->me); + continue; + } + /* see what we got back */ + for (i = targ->qfirst; i < targ->qlast; i++) { + ring = NETMAP_RXRING(nifp, i); + while (ring->avail > 0) { + uint32_t seq; + slot = &ring->slot[ring->cur]; + p = NETMAP_BUF(ring, slot->buf_idx); + + clock_gettime(CLOCK_REALTIME_PRECISE, &now); + bcopy(p+42, &seq, sizeof(seq)); + bcopy(p+46, &ts, sizeof(ts)); + ts.tv_sec = now.tv_sec - ts.tv_sec; + ts.tv_nsec = now.tv_nsec - ts.tv_nsec; + if (ts.tv_nsec < 0) { + ts.tv_nsec += 1000000000; + ts.tv_sec--; + } + if (1) D("seq %d/%d delta %d.%09d", seq, sent, + (int)ts.tv_sec, (int)ts.tv_nsec); + if (ts.tv_nsec < (int)min) + min = ts.tv_nsec; + count ++; + av += ts.tv_nsec; + ring->avail--; + ring->cur = NETMAP_RING_NEXT(ring, ring->cur); + rx++; + } + } + //D("tx %d rx %d", sent, rx); + //usleep(100000); + ts.tv_sec = now.tv_sec - last_print.tv_sec; + ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; + if (ts.tv_nsec < 0) { + ts.tv_nsec += 1000000000; + ts.tv_sec--; + } + if (ts.tv_sec >= 1) { + D("count %d min %d av %d", + count, min, av/count); + count = 0; + av = 0; + min = 100000000; + last_print = now; + } + } + return NULL; +} + + +/* + * reply to ping requests + */ +static void * +ponger_body(void *data) +{ + struct targ *targ = (struct targ *) data; + struct pollfd fds[1]; + struct netmap_if *nifp = targ->nifp; + struct netmap_ring *txring, *rxring; + int i, rx = 0, sent = 0, n = targ->g->npackets; + fds[0].fd = targ->fd; + fds[0].events = (POLLIN); + + if (targ->g->nthreads > 1) { + D("can only reply ping with 1 thread"); + return NULL; + } + D("understood ponger %d but don't know how to do it", n); + while (n == 0 || sent < n) { + uint32_t txcur, txavail; +//#define BUSYWAIT +#ifdef BUSYWAIT + ioctl(fds[0].fd, NIOCRXSYNC, NULL); +#else + if (poll(fds, 1, 1000) <= 0) { + D("poll error/timeout on queue %d", targ->me); + continue; + } +#endif + txring = NETMAP_TXRING(nifp, 0); + txcur = txring->cur; + txavail = txring->avail; + /* see what we got back */ + for (i = targ->qfirst; i < targ->qlast; i++) { + rxring = NETMAP_RXRING(nifp, i); + while (rxring->avail > 0) { + uint16_t *spkt, *dpkt; + uint32_t cur = rxring->cur; + struct netmap_slot *slot = &rxring->slot[cur]; + char *src, *dst; + src = NETMAP_BUF(rxring, slot->buf_idx); + //D("got pkt %p of size %d", src, slot->len); + rxring->avail--; + rxring->cur = NETMAP_RING_NEXT(rxring, cur); + rx++; + if (txavail == 0) + continue; + dst = NETMAP_BUF(txring, + txring->slot[txcur].buf_idx); + /* copy... */ + dpkt = (uint16_t *)dst; + spkt = (uint16_t *)src; + pkt_copy(src, dst, slot->len); + dpkt[0] = spkt[3]; + dpkt[1] = spkt[4]; + dpkt[2] = spkt[5]; + dpkt[3] = spkt[0]; + dpkt[4] = spkt[1]; + dpkt[5] = spkt[2]; + txring->slot[txcur].len = slot->len; + /* XXX swap src dst mac */ + txcur = NETMAP_RING_NEXT(txring, txcur); + txavail--; + sent++; + } + } + txring->cur = txcur; + txring->avail = txavail; + targ->count = sent; +#ifdef BUSYWAIT + ioctl(fds[0].fd, NIOCTXSYNC, NULL); +#endif + //D("tx %d rx %d", sent, rx); + } + return NULL; +} + + static void * sender_body(void *data) { struct targ *targ = (struct targ *) data; + struct pollfd fds[1]; struct netmap_if *nifp = targ->nifp; struct netmap_ring *txring; - int i, pkts_per_td = targ->g->npackets / targ->g->nthreads, sent = 0; - int continuous = 0; + int i, n = targ->g->npackets / targ->g->nthreads, sent = 0; int options = targ->g->options | OPT_COPY; - int retval; - D("start"); - if (pkts_per_td == 0) { - continuous = 1; - pkts_per_td = 100000; - } if (setaffinity(targ->thread, targ->affinity)) goto quit; /* setup poll(2) mechanism. */ @@ -490,45 +690,56 @@ D("start"); /* main loop.*/ gettimeofday(&targ->tic, NULL); - if (targ->g->use_pcap) { - int size = targ->g->pkt_size; - void *pkt = &targ->pkt; - pcap_t *p = targ->g->p; - for (i = 0; (sent < pkts_per_td && !targ->cancel) || continuous; i++) { + if (targ->g->dev_type == DEV_PCAP) { + int size = targ->g->pkt_size; + void *pkt = &targ->pkt; + pcap_t *p = targ->g->p; + + for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { if (pcap_inject(p, pkt, size) != -1) sent++; if (i > 10000) { targ->count = sent; i = 0; } - } + } + } else if (targ->g->dev_type == DEV_TAP) { /* tap */ + int size = targ->g->pkt_size; + void *pkt = &targ->pkt; + D("writing to file desc %d", targ->g->main_fd); + + for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { + if (write(targ->g->main_fd, pkt, size) != -1) + sent++; + if (i > 10000) { + targ->count = sent; + i = 0; + } + } } else { - while (sent < pkts_per_td || continuous) { + while (!targ->cancel && (n == 0 || sent < n)) { /* * wait for available room in the send queue(s) */ - if ((retval = poll(fds, 1, 2000)) <= 0) { + if (poll(fds, 1, 2000) <= 0) { if (targ->cancel) break; - if (retval == 0) - D("poll timeout on queue %d\n", targ->me); - else - D("poll error on queue %d: %s\n", targ->me, - strerror(errno)); + D("poll error/timeout on queue %d", targ->me); goto quit; } /* * scan our queues and send on those with room */ - if (sent > 100000 && !(targ->g->options & OPT_COPY) ) + if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { + D("drop copy"); options &= ~OPT_COPY; - for (i = targ->qfirst; i < targ->qlast && !targ->cancel; i++) { + } + for (i = targ->qfirst; i < targ->qlast; i++) { int m, limit = targ->g->burst; - if (!continuous && pkts_per_td - sent < limit) - limit = pkts_per_td - sent; - + if (n > 0 && n - sent < limit) + limit = n - sent; txring = NETMAP_TXRING(nifp, i); if (txring->avail == 0) continue; @@ -537,8 +748,6 @@ D("start"); sent += m; targ->count = sent; } - if (targ->cancel) - break; } /* flush any remaining packets */ ioctl(fds[0].fd, NIOCTXSYNC, NULL); @@ -566,10 +775,12 @@ quit: static void -receive_pcap(u_char *user, __unused const struct pcap_pkthdr * h, - __unused const u_char * bytes) +receive_pcap(u_char *user, const struct pcap_pkthdr * h, + const u_char * bytes) { int *count = (int *)user; + (void)h; /* UNUSED */ + (void)bytes; /* UNUSED */ (*count)++; } @@ -603,7 +814,8 @@ receiver_body(void *data) struct pollfd fds[1]; struct netmap_if *nifp = targ->nifp; struct netmap_ring *rxring; - int i, received = 0; + int i; + uint64_t received = 0; if (setaffinity(targ->thread, targ->affinity)) goto quit; @@ -614,7 +826,7 @@ receiver_body(void *data) fds[0].events = (POLLIN); /* unbounded wait for the first packet. */ - while (!targ->cancel) { + for (;;) { i = poll(fds, 1, 1000); if (i > 0 && !(fds[0].revents & POLLERR)) break; @@ -623,15 +835,24 @@ receiver_body(void *data) /* main loop, exit after 1s silence */ gettimeofday(&targ->tic, NULL); - if (targ->g->use_pcap) { + if (targ->g->dev_type == DEV_PCAP) { while (!targ->cancel) { + /* XXX should we poll ? */ pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL); } + } else if (targ->g->dev_type == DEV_TAP) { + D("reading from %s fd %d", targ->g->ifname, targ->g->main_fd); + while (!targ->cancel) { + char buf[2048]; + /* XXX should we poll ? */ + if (read(targ->g->main_fd, buf, sizeof(buf)) > 0) + targ->count++; + } } else { while (!targ->cancel) { /* Once we started to receive packets, wait at most 1 seconds before quitting. */ - if (poll(fds, 1, 1 * 1000) <= 0) { + if (poll(fds, 1, 1 * 1000) <= 0 && targ->g->forever == 0) { gettimeofday(&targ->toc, NULL); targ->toc.tv_sec -= 1; /* Subtract timeout time. */ break; @@ -647,8 +868,8 @@ receiver_body(void *data) m = receive_packets(rxring, targ->g->burst, SKIP_PAYLOAD); received += m; - targ->count = received; } + targ->count = received; // tell the card we have read the data //ioctl(fds[0].fd, NIOCRXSYNC, NULL); @@ -665,59 +886,55 @@ quit: return (NULL); } -static char * -scaled_val(double val) +/* very crude code to print a number in normalized form. + * Caller has to make sure that the buffer is large enough. + */ +static const char * +norm(char *buf, double val) { - static char buf[64]; - const char *units[] = {"", "K", "M", "G"}; - int i = 0; + char *units[] = { "", "K", "M", "G" }; + u_int i; - while (val >= 1000 && i < 3) { + for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *); i++) val /= 1000; - i++; - } - snprintf(buf, sizeof(buf), "%.2f%s", val, units[i]); - return (buf); + sprintf(buf, "%.2f %s", val, units[i]); + return buf; } static void tx_output(uint64_t sent, int size, double delta) { - uint64_t bytes_sent = sent * size; - double bw = 8.0 * bytes_sent / delta; - double pps = sent / delta; - /* - * Assume Ethernet overhead of 24 bytes per packet excluding header: - * FCS 4 bytes - * Preamble 8 bytes - * IFG 12 bytes - */ - double bw_with_overhead = 8.0 * (bytes_sent + sent * 24) / delta; + double bw, raw_bw, pps; + char b1[40], b2[80], b3[80]; printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n", sent, size, delta); - printf("Speed: %spps. ", scaled_val(pps)); - printf("Bandwidth: %sbps ", scaled_val(bw)); - printf("(%sbps with overhead).\n", scaled_val(bw_with_overhead)); - + if (delta == 0) + delta = 1e-6; + if (size < 60) /* correct for min packet size */ + size = 60; + pps = sent / delta; + bw = (8.0 * size * sent) / delta; + /* raw packets have4 bytes crc + 20 bytes framing */ + raw_bw = (8.0 * (size + 24) * sent) / delta; + + printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n", + norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) ); } static void rx_output(uint64_t received, double delta) { - - double pps = received / delta; - char units[4] = { '\0', 'K', 'M', 'G' }; - int punit = 0; - - while (pps >= 1000) { - pps /= 1000; - punit += 1; - } + double pps; + char b1[40]; printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta); - printf("Speed: %.2f%cpps.\n", pps, units[punit]); + + if (delta == 0) + delta = 1e-6; + pps = received / delta; + printf("Speed: %spps\n", norm(b1, pps)); } static void @@ -728,17 +945,21 @@ usage(void) "Usage:\n" "%s arguments\n" "\t-i interface interface name\n" - "\t-t pkts_to_send also forces send mode, 0 = continuous\n" - "\t-r pkts_to_receive also forces receive mode\n" + "\t-f function tx rx ping pong\n" + "\t-n count number of iterations (can be 0)\n" + "\t-t pkts_to_send also forces tx mode\n" + "\t-r pkts_to_receive also forces rx mode\n" "\t-l pkts_size in bytes excluding CRC\n" "\t-d dst-ip end with %%n to sweep n addresses\n" "\t-s src-ip end with %%n to sweep n addresses\n" "\t-D dst-mac end with %%n to sweep n addresses\n" "\t-S src-mac end with %%n to sweep n addresses\n" + "\t-a cpu_id use setaffinity\n" "\t-b burst size testing, mostly\n" "\t-c cores cores to use\n" "\t-p threads processes/threads to use\n" "\t-T report_ms milliseconds between reports\n" + "\t-P use libpcap instead of netmap\n" "\t-w wait_for_link_time in seconds\n" "", cmd); @@ -746,71 +967,341 @@ usage(void) exit(0); } +static void +start_threads(struct glob_arg *g) +{ + int i; + + targs = calloc(g->nthreads, sizeof(*targs)); + /* + * Now create the desired number of threads, each one + * using a single descriptor. + */ + for (i = 0; i < g->nthreads; i++) { + bzero(&targs[i], sizeof(targs[i])); + targs[i].fd = -1; /* default, with pcap */ + targs[i].g = g; + + if (g->dev_type == DEV_NETMAP) { + struct nmreq tifreq; + int tfd; + + /* register interface. */ + tfd = open("/dev/netmap", O_RDWR); + if (tfd == -1) { + D("Unable to open /dev/netmap"); + continue; + } + targs[i].fd = tfd; + + bzero(&tifreq, sizeof(tifreq)); + strncpy(tifreq.nr_name, g->ifname, sizeof(tifreq.nr_name)); + tifreq.nr_version = NETMAP_API; + tifreq.nr_ringid = (g->nthreads > 1) ? (i | NETMAP_HW_RING) : 0; + + /* + * if we are acting as a receiver only, do not touch the transmit ring. + * This is not the default because many apps may use the interface + * in both directions, but a pure receiver does not. + */ + if (g->td_body == receiver_body) { + tifreq.nr_ringid |= NETMAP_NO_TX_POLL; + } + + if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) { + D("Unable to register %s", g->ifname); + continue; + } + targs[i].nmr = tifreq; + targs[i].nifp = NETMAP_IF(g->mmap_addr, tifreq.nr_offset); + /* start threads. */ + targs[i].qfirst = (g->nthreads > 1) ? i : 0; + targs[i].qlast = (g->nthreads > 1) ? i+1 : + (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings); + } else { + targs[i].fd = g->main_fd; + } + targs[i].used = 1; + targs[i].me = i; + if (g->affinity >= 0) { + if (g->affinity < g->cpus) + targs[i].affinity = g->affinity; + else + targs[i].affinity = i % g->cpus; + } else + targs[i].affinity = -1; + /* default, init packets */ + initialize_packet(&targs[i]); + + if (pthread_create(&targs[i].thread, NULL, g->td_body, + &targs[i]) == -1) { + D("Unable to create thread %d", i); + targs[i].used = 0; + } + } +} + +static void +main_thread(struct glob_arg *g) +{ + int i; + + uint64_t prev = 0; + uint64_t count = 0; + double delta_t; + struct timeval tic, toc; + + gettimeofday(&toc, NULL); + for (;;) { + struct timeval now, delta; + uint64_t pps, usec, my_count, npkts; + int done = 0; + + delta.tv_sec = g->report_interval/1000; + delta.tv_usec = (g->report_interval%1000)*1000; + select(0, NULL, NULL, NULL, &delta); + gettimeofday(&now, NULL); + time_second = now.tv_sec; + timersub(&now, &toc, &toc); + my_count = 0; + for (i = 0; i < g->nthreads; i++) { + my_count += targs[i].count; + if (targs[i].used == 0) + done++; + } + usec = toc.tv_sec* 1000000 + toc.tv_usec; + if (usec < 10000) + continue; + npkts = my_count - prev; + pps = (npkts*1000000 + usec/2) / usec; + D("%" PRIu64 " pps (%" PRIu64 " pkts in %" PRIu64 " usec)", + pps, npkts, usec); + prev = my_count; + toc = now; + if (done == g->nthreads) + break; + } + + timerclear(&tic); + timerclear(&toc); + for (i = 0; i < g->nthreads; i++) { + /* + * Join active threads, unregister interfaces and close + * file descriptors. + */ + pthread_join(targs[i].thread, NULL); + close(targs[i].fd); + + if (targs[i].completed == 0) + D("ouch, thread %d exited with error", i); + + /* + * Collect threads output and extract information about + * how long it took to send all the packets. + */ + count += targs[i].count; + if (!timerisset(&tic) || timercmp(&targs[i].tic, &tic, <)) + tic = targs[i].tic; + if (!timerisset(&toc) || timercmp(&targs[i].toc, &toc, >)) + toc = targs[i].toc; + } + + /* print output. */ + timersub(&toc, &tic, &toc); + delta_t = toc.tv_sec + 1e-6* toc.tv_usec; + if (g->td_body == sender_body) + tx_output(count, g->pkt_size, delta_t); + else + rx_output(count, delta_t); + + if (g->dev_type == DEV_NETMAP) { + ioctl(g->main_fd, NIOCUNREGIF, NULL); // XXX deprecated + munmap(g->mmap_addr, g->mmap_size); + close(g->main_fd); + } +} + + +struct sf { + char *key; + void *f; +}; + +static struct sf func[] = { + { "tx", sender_body }, + { "rx", receiver_body }, + { "ping", pinger_body }, + { "pong", ponger_body }, + { NULL, NULL } +}; + +static int +tap_alloc(char *dev) +{ + struct ifreq ifr; + int fd, err; + char *clonedev = TAP_CLONEDEV; + + (void)err; + (void)dev; + /* Arguments taken by the function: + * + * char *dev: the name of an interface (or '\0'). MUST have enough + * space to hold the interface name if '\0' is passed + * int flags: interface flags (eg, IFF_TUN etc.) + */ + +#ifdef __FreeBSD__ + if (dev[3]) { /* tapSomething */ + static char buf[128]; + snprintf(buf, sizeof(buf), "/dev/%s", dev); + clonedev = buf; + } +#endif + /* open the device */ + if( (fd = open(clonedev, O_RDWR)) < 0 ) { + return fd; + } + D("%s open successful", clonedev); + + /* preparation of the struct ifr, of type "struct ifreq" */ + memset(&ifr, 0, sizeof(ifr)); + +#ifdef linux + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + + if (*dev) { + /* if a device name was specified, put it in the structure; otherwise, + * the kernel will try to allocate the "next" device of the + * specified type */ + strncpy(ifr.ifr_name, dev, IFNAMSIZ); + } + + /* try to create the device */ + if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { + D("failed to to a TUNSETIFF"); + close(fd); + return err; + } + + /* if the operation was successful, write back the name of the + * interface to the variable "dev", so the caller can know + * it. Note that the caller MUST reserve space in *dev (see calling + * code below) */ + strcpy(dev, ifr.ifr_name); + D("new name is %s", dev); +#endif /* linux */ + + /* this is the special file descriptor that the caller will use to talk + * with the virtual interface */ + return fd; +} int main(int arc, char **argv) { - int i, fd; - char pcap_errbuf[PCAP_ERRBUF_SIZE]; + int i; struct glob_arg g; struct nmreq nmr; - void *mmap_addr; /* the mmap address */ - void *(*td_body)(void *) = receiver_body; int ch; - int report_interval = 1000; /* report interval */ - char *ifname = NULL; int wait_link = 2; int devqueues = 1; /* how many device queues */ bzero(&g, sizeof(g)); - g.src_ip = "10.0.0.1"; - g.dst_ip = "10.1.0.1"; - g.dst_mac = "ff:ff:ff:ff:ff:ff"; - g.src_mac = NULL; + g.main_fd = -1; + g.td_body = receiver_body; + g.report_interval = 1000; /* report interval */ + g.affinity = -1; + /* ip addresses can also be a range x.x.x.x-x.x.x.y */ + g.src_ip.name = "10.0.0.1"; + g.dst_ip.name = "10.1.0.1"; + g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; + g.src_mac.name = NULL; g.pkt_size = 60; g.burst = 512; // default g.nthreads = 1; g.cpus = 1; while ( (ch = getopt(arc, argv, - "i:t:r:l:d:s:D:S:b:c:o:p:PT:w:v")) != -1) { + "a:f:n:i:t:r:l:d:s:D:S:b:c:o:p:PT:w:Wv")) != -1) { + struct sf *fn; + switch(ch) { default: D("bad option %c %s", ch, optarg); usage(); break; - case 'o': + + case 'n': + g.npackets = atoi(optarg); + break; + + case 'f': + for (fn = func; fn->key; fn++) { + if (!strcmp(fn->key, optarg)) + break; + } + if (fn->key) + g.td_body = fn->f; + else + D("unrecognised function %s", optarg); + break; + + case 'o': /* data generation options */ g.options = atoi(optarg); break; + + case 'a': /* force affinity */ + g.affinity = atoi(optarg); + break; + case 'i': /* interface */ - ifname = optarg; + g.ifname = optarg; + if (!strncmp(optarg, "tap", 3)) + g.dev_type = DEV_TAP; + else + g.dev_type = DEV_NETMAP; break; - case 't': /* send */ - td_body = sender_body; + + case 't': /* send, deprecated */ + D("-t deprecated, please use -f tx -n %s", optarg); + g.td_body = sender_body; g.npackets = atoi(optarg); break; + case 'r': /* receive */ - td_body = receiver_body; + D("-r deprecated, please use -f rx -n %s", optarg); + g.td_body = receiver_body; g.npackets = atoi(optarg); break; + case 'l': /* pkt_size */ g.pkt_size = atoi(optarg); break; + case 'd': - g.dst_ip = optarg; + g.dst_ip.name = optarg; break; + case 's': - g.src_ip = optarg; + g.src_ip.name = optarg; break; + case 'T': /* report interval */ - report_interval = atoi(optarg); + g.report_interval = atoi(optarg); break; + case 'w': wait_link = atoi(optarg); break; + + case 'W': + g.forever = 1; /* do not exit rx even with no traffic */ + break; + case 'b': /* burst */ g.burst = atoi(optarg); break; @@ -822,61 +1313,71 @@ main(int arc, char **argv) break; case 'P': - g.use_pcap = 1; + g.dev_type = DEV_PCAP; break; case 'D': /* destination mac */ - g.dst_mac = optarg; - { - struct ether_addr *mac = ether_aton(g.dst_mac); - D("ether_aton(%s) gives %p", g.dst_mac, mac); - } + g.dst_mac.name = optarg; break; + case 'S': /* source mac */ - g.src_mac = optarg; + g.src_mac.name = optarg; break; case 'v': verbose++; } } - if (ifname == NULL) { + if (g.ifname == NULL) { D("missing ifname"); usage(); } - { - int n = system_ncpus(); - if (g.cpus < 0 || g.cpus > n) { - D("%d cpus is too high, have only %d cpus", g.cpus, n); - usage(); - } - if (g.cpus == 0) - g.cpus = n; + + i = system_ncpus(); + if (g.cpus < 0 || g.cpus > i) { + D("%d cpus is too high, have only %d cpus", g.cpus, i); + usage(); } + if (g.cpus == 0) + g.cpus = i; + if (g.pkt_size < 16 || g.pkt_size > 1536) { D("bad pktsize %d\n", g.pkt_size); usage(); } - if (td_body == sender_body && g.src_mac == NULL) { - static char mybuf[20] = "ff:ff:ff:ff:ff:ff"; + if (g.src_mac.name == NULL) { + static char mybuf[20] = "00:00:00:00:00:00"; /* retrieve source mac address. */ - if (source_hwaddr(ifname, mybuf) == -1) { + if (source_hwaddr(g.ifname, mybuf) == -1) { D("Unable to retrieve source mac"); // continue, fail later } - g.src_mac = mybuf; + g.src_mac.name = mybuf; } + /* extract address ranges */ + extract_ip_range(&g.src_ip); + extract_ip_range(&g.dst_ip); + extract_mac_range(&g.src_mac); + extract_mac_range(&g.dst_mac); + + if (g.dev_type == DEV_TAP) { + D("want to use tap %s", g.ifname); + g.main_fd = tap_alloc(g.ifname); + if (g.main_fd < 0) { + D("cannot open tap %s", g.ifname); + usage(); + } + } else if (g.dev_type > DEV_NETMAP) { + char pcap_errbuf[PCAP_ERRBUF_SIZE]; - if (g.use_pcap) { - D("using pcap on %s", ifname); - g.p = pcap_open_live(ifname, 0, 1, 100, pcap_errbuf); + D("using pcap on %s", g.ifname); + pcap_errbuf[0] = '\0'; // init the buffer + g.p = pcap_open_live(g.ifname, 0, 1, 100, pcap_errbuf); if (g.p == NULL) { - D("cannot open pcap on %s", ifname); + D("cannot open pcap on %s", g.ifname); usage(); } - mmap_addr = NULL; - fd = -1; } else { bzero(&nmr, sizeof(nmr)); nmr.nr_version = NETMAP_API; @@ -889,21 +1390,21 @@ main(int arc, char **argv) * which in turn may take some time for the PHY to * reconfigure. */ - fd = open("/dev/netmap", O_RDWR); - if (fd == -1) { + g.main_fd = open("/dev/netmap", O_RDWR); + if (g.main_fd == -1) { D("Unable to open /dev/netmap"); - exit(1); + // fail later } else { - if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) { + if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) { D("Unable to get if info without name"); } else { D("map size is %d Kb", nmr.nr_memsize >> 10); } bzero(&nmr, sizeof(nmr)); nmr.nr_version = NETMAP_API; - strncpy(nmr.nr_name, ifname, sizeof(nmr.nr_name)); - if ((ioctl(fd, NIOCGINFO, &nmr)) == -1) { - D("Unable to get if info for %s", ifname); + strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name)); + if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) { + D("Unable to get if info for %s", g.ifname); } devqueues = nmr.nr_rx_rings; } @@ -919,11 +1420,12 @@ main(int arc, char **argv) * inside the body of the threads, we prefer to keep this * operation here to simplify the thread logic. */ - D("mmapping %d Kbytes", nmr.nr_memsize>>10); - mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize, + D("mapping %d Kbytes", nmr.nr_memsize>>10); + g.mmap_size = nmr.nr_memsize; + g.mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize, PROT_WRITE | PROT_READ, - MAP_SHARED, fd, 0); - if (mmap_addr == MAP_FAILED) { + MAP_SHARED, g.main_fd, 0); + if (g.mmap_addr == MAP_FAILED) { D("Unable to mmap %d KB", nmr.nr_memsize >> 10); // continue, fail later } @@ -937,8 +1439,8 @@ main(int arc, char **argv) * give time to cards that take a long time to reset the PHY. */ nmr.nr_version = NETMAP_API; - if (ioctl(fd, NIOCREGIF, &nmr) == -1) { - D("Unable to register interface %s", ifname); + if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) { + D("Unable to register interface %s", g.ifname); //continue, fail later } @@ -946,19 +1448,19 @@ main(int arc, char **argv) /* Print some debug information. */ fprintf(stdout, "%s %s: %d queues, %d threads and %d cpus.\n", - (td_body == sender_body) ? "Sending on" : "Receiving from", - ifname, + (g.td_body == sender_body) ? "Sending on" : "Receiving from", + g.ifname, devqueues, g.nthreads, g.cpus); - if (td_body == sender_body) { + if (g.td_body == sender_body) { fprintf(stdout, "%s -> %s (%s -> %s)\n", - g.src_ip, g.dst_ip, - g.src_mac, g.dst_mac); + g.src_ip.name, g.dst_ip.name, + g.src_mac.name, g.dst_mac.name); } /* Exit if something went wrong. */ - if (fd < 0) { + if (g.main_fd < 0) { D("aborting"); usage(); } @@ -980,155 +1482,19 @@ main(int arc, char **argv) global_nthreads = g.nthreads; signal(SIGINT, sigint_h); - if (g.use_pcap) { - g.p = pcap_open_live(ifname, 0, 1, 100, NULL); +#if 0 // XXX this is not needed, i believe + if (g.dev_type > DEV_NETMAP) { + g.p = pcap_open_live(g.ifname, 0, 1, 100, NULL); if (g.p == NULL) { - D("cannot open pcap on %s", ifname); + D("cannot open pcap on %s", g.ifname); usage(); } else - D("using pcap %p on %s", g.p, ifname); - } - - targs = calloc(g.nthreads, sizeof(*targs)); - /* - * Now create the desired number of threads, each one - * using a single descriptor. - */ - for (i = 0; i < g.nthreads; i++) { - struct netmap_if *tnifp; - struct nmreq tifreq; - int tfd; - - if (g.use_pcap) { - tfd = -1; - tnifp = NULL; - } else { - /* register interface. */ - tfd = open("/dev/netmap", O_RDWR); - if (tfd == -1) { - D("Unable to open /dev/netmap"); - continue; - } - - bzero(&tifreq, sizeof(tifreq)); - strncpy(tifreq.nr_name, ifname, sizeof(tifreq.nr_name)); - tifreq.nr_version = NETMAP_API; - tifreq.nr_ringid = (g.nthreads > 1) ? (i | NETMAP_HW_RING) : 0; - - /* - * if we are acting as a receiver only, do not touch the transmit ring. - * This is not the default because many apps may use the interface - * in both directions, but a pure receiver does not. - */ - if (td_body == receiver_body) { - tifreq.nr_ringid |= NETMAP_NO_TX_POLL; - } - - if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) { - D("Unable to register %s", ifname); - continue; - } - tnifp = NETMAP_IF(mmap_addr, tifreq.nr_offset); - } - /* start threads. */ - bzero(&targs[i], sizeof(targs[i])); - targs[i].g = &g; - targs[i].used = 1; - targs[i].completed = 0; - targs[i].fd = tfd; - targs[i].nmr = tifreq; - targs[i].nifp = tnifp; - targs[i].qfirst = (g.nthreads > 1) ? i : 0; - targs[i].qlast = (g.nthreads > 1) ? i+1 : - (td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings); - targs[i].me = i; - targs[i].affinity = g.cpus ? i % g.cpus : -1; - if (td_body == sender_body) { - /* initialize the packet to send. */ - initialize_packet(&targs[i]); - } - - if (pthread_create(&targs[i].thread, NULL, td_body, - &targs[i]) == -1) { - D("Unable to create thread %d", i); - targs[i].used = 0; - } - } - - { - uint64_t my_count = 0, prev = 0; - uint64_t count = 0; - double delta_t; - struct timeval tic, toc; - - gettimeofday(&toc, NULL); - for (;;) { - struct timeval now, delta; - uint64_t pps; - int done = 0; - - delta.tv_sec = report_interval/1000; - delta.tv_usec = (report_interval%1000)*1000; - select(0, NULL, NULL, NULL, &delta); - gettimeofday(&now, NULL); - timersub(&now, &toc, &toc); - my_count = 0; - for (i = 0; i < g.nthreads; i++) { - my_count += targs[i].count; - if (targs[i].used == 0) - done++; - } - pps = toc.tv_sec* 1000000 + toc.tv_usec; - if (pps < 10000) - continue; - pps = ((my_count - prev) * 1000000 + pps / 2) / pps; - D("%" PRIu64 " pps", pps); - prev = my_count; - toc = now; - if (done == g.nthreads) - break; - } - - timerclear(&tic); - timerclear(&toc); - for (i = 0; i < g.nthreads; i++) { - /* - * Join active threads, unregister interfaces and close - * file descriptors. - */ - pthread_join(targs[i].thread, NULL); - ioctl(targs[i].fd, NIOCUNREGIF, &targs[i].nmr); - close(targs[i].fd); - - if (targs[i].completed == 0) - continue; - - /* - * Collect threads output and extract information about - * how long it took to send all the packets. - */ - count += targs[i].count; - if (!timerisset(&tic) || timercmp(&targs[i].tic, &tic, <)) - tic = targs[i].tic; - if (!timerisset(&toc) || timercmp(&targs[i].toc, &toc, >)) - toc = targs[i].toc; + D("using pcap %p on %s", g.p, g.ifname); } - - /* print output. */ - timersub(&toc, &tic, &toc); - delta_t = toc.tv_sec + 1e-6* toc.tv_usec; - if (td_body == sender_body) - tx_output(count, g.pkt_size, delta_t); - else - rx_output(count, delta_t); - } - - if (g.use_pcap == 0) { - ioctl(fd, NIOCUNREGIF, &nmr); - munmap(mmap_addr, nmr.nr_memsize); - close(fd); - } - - return (0); +#endif // XXX + start_threads(&g); + main_thread(&g); + return 0; } + /* end of file */ -- 2.45.0