2 * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * Example program to show how to build a multithreaded packet
31 * source/sink using the netmap device.
33 * In this example we create a programmable number of threads
34 * to take care of all the queues of the interface used to
35 * send or receive traffic.
41 #include <ctype.h> // isprint()
43 const char *default_payload="netmap pkt-gen payload\n"
44 "http://info.iet.unipi.it/~luigi/netmap/ ";
46 int time_second; // support for RD() debugging macro
50 #define SKIP_PAYLOAD 1 /* do not check payload. */
53 struct ether_header eh;
56 uint8_t body[2048]; // XXX hardwired
57 } __attribute__((__packed__));
61 struct in_addr start, end, cur;
62 uint16_t port0, port1, cur_p;
67 struct ether_addr start, end;
71 * global arguments for all threads
75 struct ip_range src_ip;
76 struct ip_range dst_ip;
77 struct mac_range dst_mac;
78 struct mac_range src_mac;
82 int npackets; /* total packets to send */
85 int options; /* testing */
86 #define OPT_PREFETCH 1
90 #define OPT_TS 16 /* add a timestamp */
91 #define OPT_INDIRECT 32 /* use indirect buffers, tx only */
92 #define OPT_DUMP 64 /* dump rx/tx traffic */
97 struct timespec tx_period;
102 void *(*td_body)(void *);
107 enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
111 * Arguments for a new thread. The same structure is used by
112 * the source and the sink
121 struct netmap_if *nifp;
122 uint16_t qfirst, qlast; /* range of queues to scan */
123 volatile uint64_t count;
124 struct timespec tic, toc;
134 * extract the extremes from a range of ipv4 addresses.
135 * addr_lo[-addr_hi][:port_lo[-port_hi]]
138 extract_ip_range(struct ip_range *r)
141 char buf1[16]; // one ip address
143 D("extract IP range from %s", r->name);
144 p_lo = index(r->name, ':'); /* do we have ports ? */
146 D(" found ports at %s", p_lo);
148 p_hi = index(p_lo, '-');
153 r->port0 = strtol(p_lo, NULL, 0);
154 r->port1 = strtol(p_hi, NULL, 0);
155 if (r->port1 < r->port0) {
161 D("ports are %d to %d", r->port0, r->port1);
163 p_hi = index(r->name, '-'); /* do we have upper ip ? */
168 inet_aton(r->name, &r->start);
169 inet_aton(p_hi, &r->end);
170 if (r->start.s_addr > r->end.s_addr) {
176 strncpy(buf1, inet_ntoa(r->end), sizeof(buf1));
177 D("range is %s %d to %s %d", inet_ntoa(r->start), r->port0,
182 extract_mac_range(struct mac_range *r)
184 D("extract MAC range from %s", r->name);
185 bcopy(ether_aton(r->name), &r->start, 6);
186 bcopy(ether_aton(r->name), &r->end, 6);
188 bcopy(targ->src_mac, eh->ether_shost, 6);
189 p = index(targ->g->src_mac, '-');
191 targ->src_mac_range = atoi(p+1);
193 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6);
194 bcopy(targ->dst_mac, eh->ether_dhost, 6);
195 p = index(targ->g->dst_mac, '-');
197 targ->dst_mac_range = atoi(p+1);
199 D("%s starts at %s", r->name, ether_ntoa(&r->start));
202 static struct targ *targs;
203 static int global_nthreads;
205 /* control-C handler */
211 (void)sig; /* UNUSED */
212 for (i = 0; i < global_nthreads; i++) {
215 signal(SIGINT, SIG_DFL);
218 /* sysctl wrapper to return the number of active CPUs */
229 sysctl(mib, 2, &ncpus, &len, NULL, 0);
234 #endif /* !__FreeBSD__ */
238 #define sockaddr_dl sockaddr_ll
239 #define sdl_family sll_family
240 #define AF_LINK AF_PACKET
241 #define LLADDR(s) s->sll_addr;
242 #include <linux/if_tun.h>
243 #define TAP_CLONEDEV "/dev/net/tun"
244 #endif /* __linux__ */
247 #include <net/if_tun.h>
248 #define TAP_CLONEDEV "/dev/tap"
249 #endif /* __FreeBSD */
252 // #warning TAP not supported on apple ?
253 #include <net/if_utun.h>
254 #define TAP_CLONEDEV "/dev/tap"
255 #endif /* __APPLE__ */
259 * locate the src mac address for our interface, put it
260 * into the user-supplied buffer. return 0 if ok, -1 on error.
263 source_hwaddr(const char *ifname, char *buf)
265 struct ifaddrs *ifaphead, *ifap;
266 int l = sizeof(ifap->ifa_name);
268 if (getifaddrs(&ifaphead) != 0) {
269 D("getifaddrs %s failed", ifname);
273 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) {
274 struct sockaddr_dl *sdl =
275 (struct sockaddr_dl *)ifap->ifa_addr;
278 if (!sdl || sdl->sdl_family != AF_LINK)
280 if (strncmp(ifap->ifa_name, ifname, l) != 0)
282 mac = (uint8_t *)LLADDR(sdl);
283 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x",
284 mac[0], mac[1], mac[2],
285 mac[3], mac[4], mac[5]);
287 D("source hwaddr %s", buf);
290 freeifaddrs(ifaphead);
295 /* set the thread affinity. */
297 setaffinity(pthread_t me, int i)
305 /* Set thread affinity affinity.*/
307 CPU_SET(i, &cpumask);
309 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
310 D("Unable to set affinity");
314 (void)me; /* suppress 'unused' warnings */
316 #endif /* __FreeBSD__ */
320 /* Compute the checksum of the given ip header. */
322 checksum(const void *data, uint16_t len, uint32_t sum)
324 const uint8_t *addr = data;
327 /* Checksum all the pairs of bytes first... */
328 for (i = 0; i < (len & ~1U); i += 2) {
329 sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i)));
334 * If there's a single byte left over, checksum it, too.
335 * Network byte order is big-endian, so the remaining byte is
347 wrapsum(u_int32_t sum)
353 /* Check the payload of the packet for errors (use it for debug).
354 * Look for consecutive ascii representations of the size of the packet.
357 dump_payload(char *p, int len, struct netmap_ring *ring, int cur)
362 /* get the length in ASCII of the length of the packet. */
364 printf("ring %p cur %5d len %5d buf %p\n", ring, cur, len, p);
365 /* hexdump routine */
366 for (i = 0; i < len; ) {
367 memset(buf, sizeof(buf), ' ');
368 sprintf(buf, "%5d: ", i);
370 for (j=0; j < 16 && i < len; i++, j++)
371 sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i]));
373 for (j=0; j < 16 && i < len; i++, j++)
374 sprintf(buf+7+j + 48, "%c",
375 isprint(p[i]) ? p[i] : '.');
381 * Fill a packet with some payload.
382 * We create a UDP packet so the payload starts at
383 * 14+20+8 = 42 bytes.
386 #define uh_sport source
387 #define uh_dport dest
393 initialize_packet(struct targ *targ)
395 struct pkt *pkt = &targ->pkt;
396 struct ether_header *eh;
399 uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip);
400 const char *payload = targ->g->options & OPT_INDIRECT ?
401 "XXXXXXXXXXXXXXXXXXXXXX" : default_payload;
402 int i, l, l0 = strlen(payload);
404 for (i = 0; i < paylen;) {
405 l = min(l0, paylen - i);
406 bcopy(payload, pkt->body + i, l);
409 pkt->body[i-1] = '\0';
412 ip->ip_v = IPVERSION;
415 ip->ip_tos = IPTOS_LOWDELAY;
416 ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh));
418 ip->ip_off = htons(IP_DF); /* Don't fragment */
419 ip->ip_ttl = IPDEFTTL;
420 ip->ip_p = IPPROTO_UDP;
421 ip->ip_dst.s_addr = targ->g->dst_ip.cur.s_addr;
422 if (++targ->g->dst_ip.cur.s_addr > targ->g->dst_ip.end.s_addr)
423 targ->g->dst_ip.cur.s_addr = targ->g->dst_ip.start.s_addr;
424 ip->ip_src.s_addr = targ->g->src_ip.cur.s_addr;
425 if (++targ->g->src_ip.cur.s_addr > targ->g->src_ip.end.s_addr)
426 targ->g->src_ip.cur.s_addr = targ->g->src_ip.start.s_addr;
427 ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0));
431 udp->uh_sport = htons(targ->g->src_ip.cur_p);
432 if (++targ->g->src_ip.cur_p > targ->g->src_ip.port1)
433 targ->g->src_ip.cur_p = targ->g->src_ip.port0;
434 udp->uh_dport = htons(targ->g->dst_ip.cur_p);
435 if (++targ->g->dst_ip.cur_p > targ->g->dst_ip.port1)
436 targ->g->dst_ip.cur_p = targ->g->dst_ip.port0;
437 udp->uh_ulen = htons(paylen);
438 /* Magic: taken from sbin/dhclient/packet.c */
439 udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp),
441 paylen - sizeof(*udp),
442 checksum(&ip->ip_src, 2 * sizeof(ip->ip_src),
443 IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen)
449 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6);
450 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6);
451 eh->ether_type = htons(ETHERTYPE_IP);
452 // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0);
458 * create and enqueue a batch of packets on a ring.
459 * On the last one set NS_REPORT to tell the driver to generate
460 * an interrupt when done.
463 send_packets(struct netmap_ring *ring, struct pkt *pkt,
464 int size, u_int count, int options)
466 u_int sent, cur = ring->cur;
468 if (ring->avail < count)
472 if (options & (OPT_COPY | OPT_PREFETCH) ) {
473 for (sent = 0; sent < count; sent++) {
474 struct netmap_slot *slot = &ring->slot[cur];
475 char *p = NETMAP_BUF(ring, slot->buf_idx);
478 cur = NETMAP_RING_NEXT(ring, cur);
483 for (sent = 0; sent < count; sent++) {
484 struct netmap_slot *slot = &ring->slot[cur];
485 char *p = NETMAP_BUF(ring, slot->buf_idx);
488 if (options & OPT_DUMP)
489 dump_payload(p, size, ring, cur);
490 if (options & OPT_INDIRECT) {
491 slot->flags |= NS_INDIRECT;
492 *((struct pkt **)(void *)p) = pkt;
493 } else if (options & OPT_COPY)
494 pkt_copy(pkt, p, size);
495 else if (options & OPT_MEMCPY)
496 memcpy(p, pkt, size);
497 else if (options & OPT_PREFETCH)
500 if (sent == count - 1)
501 slot->flags |= NS_REPORT;
502 cur = NETMAP_RING_NEXT(ring, cur);
511 * Send a packet, and wait for a response.
512 * The payload (after UDP header, ofs 42) has a 4-byte sequence
513 * followed by a struct timeval (or bintime?)
515 #define PAY_OFS 42 /* where in the pkt... */
518 pinger_body(void *data)
520 struct targ *targ = (struct targ *) data;
521 struct pollfd fds[1];
522 struct netmap_if *nifp = targ->nifp;
523 int i, rx = 0, n = targ->g->npackets;
525 fds[0].fd = targ->fd;
526 fds[0].events = (POLLIN);
527 static uint32_t sent;
528 struct timespec ts, now, last_print;
529 uint32_t count = 0, min = 1000000000, av = 0;
531 if (targ->g->nthreads > 1) {
532 D("can only ping with 1 thread");
536 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print);
537 while (n == 0 || (int)sent < n) {
538 struct netmap_ring *ring = NETMAP_TXRING(nifp, 0);
539 struct netmap_slot *slot;
541 for (i = 0; i < 1; i++) {
542 slot = &ring->slot[ring->cur];
543 slot->len = targ->g->pkt_size;
544 p = NETMAP_BUF(ring, slot->buf_idx);
546 if (ring->avail == 0) {
547 D("-- ouch, cannot send");
549 pkt_copy(&targ->pkt, p, targ->g->pkt_size);
550 clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
551 bcopy(&sent, p+42, sizeof(sent));
552 bcopy(&ts, p+46, sizeof(ts));
554 ring->cur = NETMAP_RING_NEXT(ring, ring->cur);
558 /* should use a parameter to decide how often to send */
559 if (poll(fds, 1, 3000) <= 0) {
560 D("poll error/timeout on queue %d", targ->me);
563 /* see what we got back */
564 for (i = targ->qfirst; i < targ->qlast; i++) {
565 ring = NETMAP_RXRING(nifp, i);
566 while (ring->avail > 0) {
568 slot = &ring->slot[ring->cur];
569 p = NETMAP_BUF(ring, slot->buf_idx);
571 clock_gettime(CLOCK_REALTIME_PRECISE, &now);
572 bcopy(p+42, &seq, sizeof(seq));
573 bcopy(p+46, &ts, sizeof(ts));
574 ts.tv_sec = now.tv_sec - ts.tv_sec;
575 ts.tv_nsec = now.tv_nsec - ts.tv_nsec;
576 if (ts.tv_nsec < 0) {
577 ts.tv_nsec += 1000000000;
580 if (1) D("seq %d/%d delta %d.%09d", seq, sent,
581 (int)ts.tv_sec, (int)ts.tv_nsec);
582 if (ts.tv_nsec < (int)min)
587 ring->cur = NETMAP_RING_NEXT(ring, ring->cur);
591 //D("tx %d rx %d", sent, rx);
593 ts.tv_sec = now.tv_sec - last_print.tv_sec;
594 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec;
595 if (ts.tv_nsec < 0) {
596 ts.tv_nsec += 1000000000;
599 if (ts.tv_sec >= 1) {
600 D("count %d min %d av %d",
601 count, min, av/count);
613 * reply to ping requests
616 ponger_body(void *data)
618 struct targ *targ = (struct targ *) data;
619 struct pollfd fds[1];
620 struct netmap_if *nifp = targ->nifp;
621 struct netmap_ring *txring, *rxring;
622 int i, rx = 0, sent = 0, n = targ->g->npackets;
623 fds[0].fd = targ->fd;
624 fds[0].events = (POLLIN);
626 if (targ->g->nthreads > 1) {
627 D("can only reply ping with 1 thread");
630 D("understood ponger %d but don't know how to do it", n);
631 while (n == 0 || sent < n) {
632 uint32_t txcur, txavail;
635 ioctl(fds[0].fd, NIOCRXSYNC, NULL);
637 if (poll(fds, 1, 1000) <= 0) {
638 D("poll error/timeout on queue %d", targ->me);
642 txring = NETMAP_TXRING(nifp, 0);
644 txavail = txring->avail;
645 /* see what we got back */
646 for (i = targ->qfirst; i < targ->qlast; i++) {
647 rxring = NETMAP_RXRING(nifp, i);
648 while (rxring->avail > 0) {
649 uint16_t *spkt, *dpkt;
650 uint32_t cur = rxring->cur;
651 struct netmap_slot *slot = &rxring->slot[cur];
653 src = NETMAP_BUF(rxring, slot->buf_idx);
654 //D("got pkt %p of size %d", src, slot->len);
656 rxring->cur = NETMAP_RING_NEXT(rxring, cur);
660 dst = NETMAP_BUF(txring,
661 txring->slot[txcur].buf_idx);
663 dpkt = (uint16_t *)dst;
664 spkt = (uint16_t *)src;
665 pkt_copy(src, dst, slot->len);
672 txring->slot[txcur].len = slot->len;
673 /* XXX swap src dst mac */
674 txcur = NETMAP_RING_NEXT(txring, txcur);
680 txring->avail = txavail;
683 ioctl(fds[0].fd, NIOCTXSYNC, NULL);
685 //D("tx %d rx %d", sent, rx);
691 timespec_ge(const struct timespec *a, const struct timespec *b)
694 if (a->tv_sec > b->tv_sec)
696 if (a->tv_sec < b->tv_sec)
698 if (a->tv_nsec >= b->tv_nsec)
703 static __inline struct timespec
704 timeval2spec(const struct timeval *a)
706 struct timespec ts = {
708 .tv_nsec = a->tv_usec * 1000
713 static __inline struct timeval
714 timespec2val(const struct timespec *a)
716 struct timeval tv = {
718 .tv_usec = a->tv_nsec / 1000
725 wait_time(struct timespec ts, struct timespec *wakeup_ts, long long *waited)
727 struct timespec curtime;
732 if (clock_gettime(CLOCK_REALTIME_PRECISE, &curtime) == -1) {
733 D("clock_gettime: %s", strerror(errno));
736 while (timespec_ge(&ts, &curtime)) {
739 if (clock_gettime(CLOCK_REALTIME_PRECISE, &curtime) == -1) {
744 if (wakeup_ts != NULL)
745 *wakeup_ts = curtime;
750 timespec_add(struct timespec *tsa, struct timespec *tsb)
752 tsa->tv_sec += tsb->tv_sec;
753 tsa->tv_nsec += tsb->tv_nsec;
754 if (tsa->tv_nsec >= 1000000000) {
756 tsa->tv_nsec -= 1000000000;
762 sender_body(void *data)
764 struct targ *targ = (struct targ *) data;
766 struct pollfd fds[1];
767 struct netmap_if *nifp = targ->nifp;
768 struct netmap_ring *txring;
769 int i, n = targ->g->npackets / targ->g->nthreads, sent = 0;
770 int options = targ->g->options | OPT_COPY;
771 struct timespec tmptime, nexttime = { 0, 0}; // XXX silence compiler
772 int rate_limit = targ->g->tx_rate;
773 long long waited = 0;
776 if (setaffinity(targ->thread, targ->affinity))
778 /* setup poll(2) mechanism. */
779 memset(fds, 0, sizeof(fds));
780 fds[0].fd = targ->fd;
781 fds[0].events = (POLLOUT);
784 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
788 timespec_add(&targ->tic, &tmptime);
789 targ->tic.tv_nsec = 0;
790 if (wait_time(targ->tic, NULL, NULL) == -1) {
791 D("wait_time: %s", strerror(errno));
794 nexttime = targ->tic;
796 if (targ->g->dev_type == DEV_PCAP) {
797 int size = targ->g->pkt_size;
798 void *pkt = &targ->pkt;
799 pcap_t *p = targ->g->p;
801 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
802 if (pcap_inject(p, pkt, size) != -1)
809 } else if (targ->g->dev_type == DEV_TAP) { /* tap */
810 int size = targ->g->pkt_size;
811 void *pkt = &targ->pkt;
812 D("writing to file desc %d", targ->g->main_fd);
814 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
815 if (write(targ->g->main_fd, pkt, size) != -1)
824 while (!targ->cancel && (n == 0 || sent < n)) {
826 if (rate_limit && tosend <= 0) {
827 tosend = targ->g->burst;
828 timespec_add(&nexttime, &targ->g->tx_period);
829 if (wait_time(nexttime, &tmptime, &waited) == -1) {
836 * wait for available room in the send queue(s)
838 if (poll(fds, 1, 2000) <= 0) {
841 D("poll error/timeout on queue %d", targ->me);
845 * scan our queues and send on those with room
847 if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) {
849 options &= ~OPT_COPY;
851 for (i = targ->qfirst; i < targ->qlast; i++) {
852 int m, limit = rate_limit ? tosend : targ->g->burst;
853 if (n > 0 && n - sent < limit)
855 txring = NETMAP_TXRING(nifp, i);
856 if (txring->avail == 0)
858 m = send_packets(txring, &targ->pkt, targ->g->pkt_size,
865 /* flush any remaining packets */
866 ioctl(fds[0].fd, NIOCTXSYNC, NULL);
868 /* final part: wait all the TX queues to be empty. */
869 for (i = targ->qfirst; i < targ->qlast; i++) {
870 txring = NETMAP_TXRING(nifp, i);
871 while (!NETMAP_TX_RING_EMPTY(txring)) {
872 ioctl(fds[0].fd, NIOCTXSYNC, NULL);
873 usleep(1); /* wait 1 tick */
878 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
883 /* reset the ``used`` flag. */
891 receive_pcap(u_char *user, const struct pcap_pkthdr * h,
892 const u_char * bytes)
894 int *count = (int *)user;
895 (void)h; /* UNUSED */
896 (void)bytes; /* UNUSED */
901 receive_packets(struct netmap_ring *ring, u_int limit, int dump)
906 if (ring->avail < limit)
908 for (rx = 0; rx < limit; rx++) {
909 struct netmap_slot *slot = &ring->slot[cur];
910 char *p = NETMAP_BUF(ring, slot->buf_idx);
912 slot->flags = OPT_INDIRECT; // XXX
914 dump_payload(p, slot->len, ring, cur);
916 cur = NETMAP_RING_NEXT(ring, cur);
925 receiver_body(void *data)
927 struct targ *targ = (struct targ *) data;
928 struct pollfd fds[1];
929 struct netmap_if *nifp = targ->nifp;
930 struct netmap_ring *rxring;
932 uint64_t received = 0;
934 if (setaffinity(targ->thread, targ->affinity))
937 /* setup poll(2) mechanism. */
938 memset(fds, 0, sizeof(fds));
939 fds[0].fd = targ->fd;
940 fds[0].events = (POLLIN);
942 /* unbounded wait for the first packet. */
944 i = poll(fds, 1, 1000);
945 if (i > 0 && !(fds[0].revents & POLLERR))
947 D("waiting for initial packets, poll returns %d %d", i, fds[0].revents);
950 /* main loop, exit after 1s silence */
951 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
952 if (targ->g->dev_type == DEV_PCAP) {
953 while (!targ->cancel) {
954 /* XXX should we poll ? */
955 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL);
957 } else if (targ->g->dev_type == DEV_TAP) {
958 D("reading from %s fd %d", targ->g->ifname, targ->g->main_fd);
959 while (!targ->cancel) {
961 /* XXX should we poll ? */
962 if (read(targ->g->main_fd, buf, sizeof(buf)) > 0)
966 int dump = targ->g->options & OPT_DUMP;
967 while (!targ->cancel) {
968 /* Once we started to receive packets, wait at most 1 seconds
970 if (poll(fds, 1, 1 * 1000) <= 0 && !targ->g->forever) {
971 clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc);
972 targ->toc.tv_sec -= 1; /* Subtract timeout time. */
976 for (i = targ->qfirst; i < targ->qlast; i++) {
979 rxring = NETMAP_RXRING(nifp, i);
980 if (rxring->avail == 0)
983 m = receive_packets(rxring, targ->g->burst, dump);
986 targ->count = received;
988 // tell the card we have read the data
989 //ioctl(fds[0].fd, NIOCRXSYNC, NULL);
994 targ->count = received;
997 /* reset the ``used`` flag. */
1003 /* very crude code to print a number in normalized form.
1004 * Caller has to make sure that the buffer is large enough.
1007 norm(char *buf, double val)
1009 char *units[] = { "", "K", "M", "G" };
1012 for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *); i++)
1014 sprintf(buf, "%.2f %s", val, units[i]);
1019 tx_output(uint64_t sent, int size, double delta)
1021 double bw, raw_bw, pps;
1022 char b1[40], b2[80], b3[80];
1024 printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n",
1028 if (size < 60) /* correct for min packet size */
1031 bw = (8.0 * size * sent) / delta;
1032 /* raw packets have4 bytes crc + 20 bytes framing */
1033 raw_bw = (8.0 * (size + 24) * sent) / delta;
1035 printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n",
1036 norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) );
1041 rx_output(uint64_t received, double delta)
1046 printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta);
1050 pps = received / delta;
1051 printf("Speed: %spps\n", norm(b1, pps));
1057 const char *cmd = "pkt-gen";
1061 "\t-i interface interface name\n"
1062 "\t-f function tx rx ping pong\n"
1063 "\t-n count number of iterations (can be 0)\n"
1064 "\t-t pkts_to_send also forces tx mode\n"
1065 "\t-r pkts_to_receive also forces rx mode\n"
1066 "\t-l pkts_size in bytes excluding CRC\n"
1067 "\t-d dst-ip end with %%n to sweep n addresses\n"
1068 "\t-s src-ip end with %%n to sweep n addresses\n"
1069 "\t-D dst-mac end with %%n to sweep n addresses\n"
1070 "\t-S src-mac end with %%n to sweep n addresses\n"
1071 "\t-a cpu_id use setaffinity\n"
1072 "\t-b burst size testing, mostly\n"
1073 "\t-c cores cores to use\n"
1074 "\t-p threads processes/threads to use\n"
1075 "\t-T report_ms milliseconds between reports\n"
1076 "\t-P use libpcap instead of netmap\n"
1077 "\t-w wait_for_link_time in seconds\n"
1085 start_threads(struct glob_arg *g)
1089 targs = calloc(g->nthreads, sizeof(*targs));
1091 * Now create the desired number of threads, each one
1092 * using a single descriptor.
1094 for (i = 0; i < g->nthreads; i++) {
1095 bzero(&targs[i], sizeof(targs[i]));
1096 targs[i].fd = -1; /* default, with pcap */
1099 if (g->dev_type == DEV_NETMAP) {
1100 struct nmreq tifreq;
1103 /* register interface. */
1104 tfd = open("/dev/netmap", O_RDWR);
1106 D("Unable to open /dev/netmap");
1111 bzero(&tifreq, sizeof(tifreq));
1112 strncpy(tifreq.nr_name, g->ifname, sizeof(tifreq.nr_name));
1113 tifreq.nr_version = NETMAP_API;
1114 tifreq.nr_ringid = (g->nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
1117 * if we are acting as a receiver only, do not touch the transmit ring.
1118 * This is not the default because many apps may use the interface
1119 * in both directions, but a pure receiver does not.
1121 if (g->td_body == receiver_body) {
1122 tifreq.nr_ringid |= NETMAP_NO_TX_POLL;
1125 if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) {
1126 D("Unable to register %s", g->ifname);
1129 targs[i].nmr = tifreq;
1130 targs[i].nifp = NETMAP_IF(g->mmap_addr, tifreq.nr_offset);
1131 /* start threads. */
1132 targs[i].qfirst = (g->nthreads > 1) ? i : 0;
1133 targs[i].qlast = (g->nthreads > 1) ? i+1 :
1134 (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings);
1136 targs[i].fd = g->main_fd;
1140 if (g->affinity >= 0) {
1141 if (g->affinity < g->cpus)
1142 targs[i].affinity = g->affinity;
1144 targs[i].affinity = i % g->cpus;
1146 targs[i].affinity = -1;
1147 /* default, init packets */
1148 initialize_packet(&targs[i]);
1150 if (pthread_create(&targs[i].thread, NULL, g->td_body,
1152 D("Unable to create thread %d", i);
1159 main_thread(struct glob_arg *g)
1166 struct timeval tic, toc;
1168 gettimeofday(&toc, NULL);
1170 struct timeval now, delta;
1171 uint64_t pps, usec, my_count, npkts;
1174 delta.tv_sec = g->report_interval/1000;
1175 delta.tv_usec = (g->report_interval%1000)*1000;
1176 select(0, NULL, NULL, NULL, &delta);
1177 gettimeofday(&now, NULL);
1178 time_second = now.tv_sec;
1179 timersub(&now, &toc, &toc);
1181 for (i = 0; i < g->nthreads; i++) {
1182 my_count += targs[i].count;
1183 if (targs[i].used == 0)
1186 usec = toc.tv_sec* 1000000 + toc.tv_usec;
1189 npkts = my_count - prev;
1190 pps = (npkts*1000000 + usec/2) / usec;
1191 D("%" PRIu64 " pps (%" PRIu64 " pkts in %" PRIu64 " usec)",
1195 if (done == g->nthreads)
1201 for (i = 0; i < g->nthreads; i++) {
1202 struct timespec t_tic, t_toc;
1204 * Join active threads, unregister interfaces and close
1208 pthread_join(targs[i].thread, NULL);
1211 if (targs[i].completed == 0)
1212 D("ouch, thread %d exited with error", i);
1215 * Collect threads output and extract information about
1216 * how long it took to send all the packets.
1218 count += targs[i].count;
1219 t_tic = timeval2spec(&tic);
1220 t_toc = timeval2spec(&toc);
1221 if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic))
1222 tic = timespec2val(&targs[i].tic);
1223 if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc))
1224 toc = timespec2val(&targs[i].toc);
1228 timersub(&toc, &tic, &toc);
1229 delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
1230 if (g->td_body == sender_body)
1231 tx_output(count, g->pkt_size, delta_t);
1233 rx_output(count, delta_t);
1235 if (g->dev_type == DEV_NETMAP) {
1236 munmap(g->mmap_addr, g->mmap_size);
1247 static struct sf func[] = {
1248 { "tx", sender_body },
1249 { "rx", receiver_body },
1250 { "ping", pinger_body },
1251 { "pong", ponger_body },
1256 tap_alloc(char *dev)
1260 char *clonedev = TAP_CLONEDEV;
1264 /* Arguments taken by the function:
1266 * char *dev: the name of an interface (or '\0'). MUST have enough
1267 * space to hold the interface name if '\0' is passed
1268 * int flags: interface flags (eg, IFF_TUN etc.)
1272 if (dev[3]) { /* tapSomething */
1273 static char buf[128];
1274 snprintf(buf, sizeof(buf), "/dev/%s", dev);
1278 /* open the device */
1279 if( (fd = open(clonedev, O_RDWR)) < 0 ) {
1282 D("%s open successful", clonedev);
1284 /* preparation of the struct ifr, of type "struct ifreq" */
1285 memset(&ifr, 0, sizeof(ifr));
1288 ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
1291 /* if a device name was specified, put it in the structure; otherwise,
1292 * the kernel will try to allocate the "next" device of the
1294 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
1297 /* try to create the device */
1298 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) {
1299 D("failed to to a TUNSETIFF");
1304 /* if the operation was successful, write back the name of the
1305 * interface to the variable "dev", so the caller can know
1306 * it. Note that the caller MUST reserve space in *dev (see calling
1308 strcpy(dev, ifr.ifr_name);
1309 D("new name is %s", dev);
1312 /* this is the special file descriptor that the caller will use to talk
1313 * with the virtual interface */
1318 main(int arc, char **argv)
1327 int devqueues = 1; /* how many device queues */
1329 bzero(&g, sizeof(g));
1332 g.td_body = receiver_body;
1333 g.report_interval = 1000; /* report interval */
1335 /* ip addresses can also be a range x.x.x.x-x.x.x.y */
1336 g.src_ip.name = "10.0.0.1";
1337 g.dst_ip.name = "10.1.0.1";
1338 g.dst_mac.name = "ff:ff:ff:ff:ff:ff";
1339 g.src_mac.name = NULL;
1341 g.burst = 512; // default
1347 while ( (ch = getopt(arc, argv,
1348 "a:f:n:i:It:r:l:d:s:D:S:b:c:o:p:PT:w:WvR:X")) != -1) {
1353 D("bad option %c %s", ch, optarg);
1358 g.npackets = atoi(optarg);
1362 for (fn = func; fn->key; fn++) {
1363 if (!strcmp(fn->key, optarg))
1369 D("unrecognised function %s", optarg);
1372 case 'o': /* data generation options */
1373 g.options = atoi(optarg);
1376 case 'a': /* force affinity */
1377 g.affinity = atoi(optarg);
1380 case 'i': /* interface */
1382 if (!strncmp(optarg, "tap", 3))
1383 g.dev_type = DEV_TAP;
1385 g.dev_type = DEV_NETMAP;
1389 g.options |= OPT_INDIRECT; /* XXX use indirect buffer */
1392 case 't': /* send, deprecated */
1393 D("-t deprecated, please use -f tx -n %s", optarg);
1394 g.td_body = sender_body;
1395 g.npackets = atoi(optarg);
1398 case 'r': /* receive */
1399 D("-r deprecated, please use -f rx -n %s", optarg);
1400 g.td_body = receiver_body;
1401 g.npackets = atoi(optarg);
1404 case 'l': /* pkt_size */
1405 g.pkt_size = atoi(optarg);
1409 g.dst_ip.name = optarg;
1413 g.src_ip.name = optarg;
1416 case 'T': /* report interval */
1417 g.report_interval = atoi(optarg);
1421 wait_link = atoi(optarg);
1424 case 'W': /* XXX changed default */
1425 g.forever = 0; /* do not exit rx even with no traffic */
1428 case 'b': /* burst */
1429 g.burst = atoi(optarg);
1432 g.cpus = atoi(optarg);
1435 g.nthreads = atoi(optarg);
1439 g.dev_type = DEV_PCAP;
1442 case 'D': /* destination mac */
1443 g.dst_mac.name = optarg;
1446 case 'S': /* source mac */
1447 g.src_mac.name = optarg;
1453 g.tx_rate = atoi(optarg);
1456 g.options |= OPT_DUMP;
1460 if (g.ifname == NULL) {
1461 D("missing ifname");
1466 if (g.cpus < 0 || g.cpus > i) {
1467 D("%d cpus is too high, have only %d cpus", g.cpus, i);
1473 if (g.pkt_size < 16 || g.pkt_size > 1536) {
1474 D("bad pktsize %d\n", g.pkt_size);
1478 if (g.src_mac.name == NULL) {
1479 static char mybuf[20] = "00:00:00:00:00:00";
1480 /* retrieve source mac address. */
1481 if (source_hwaddr(g.ifname, mybuf) == -1) {
1482 D("Unable to retrieve source mac");
1483 // continue, fail later
1485 g.src_mac.name = mybuf;
1487 /* extract address ranges */
1488 extract_ip_range(&g.src_ip);
1489 extract_ip_range(&g.dst_ip);
1490 extract_mac_range(&g.src_mac);
1491 extract_mac_range(&g.dst_mac);
1493 if (g.dev_type == DEV_TAP) {
1494 D("want to use tap %s", g.ifname);
1495 g.main_fd = tap_alloc(g.ifname);
1496 if (g.main_fd < 0) {
1497 D("cannot open tap %s", g.ifname);
1500 } else if (g.dev_type > DEV_NETMAP) {
1501 char pcap_errbuf[PCAP_ERRBUF_SIZE];
1503 D("using pcap on %s", g.ifname);
1504 pcap_errbuf[0] = '\0'; // init the buffer
1505 g.p = pcap_open_live(g.ifname, 0, 1, 100, pcap_errbuf);
1507 D("cannot open pcap on %s", g.ifname);
1511 bzero(&nmr, sizeof(nmr));
1512 nmr.nr_version = NETMAP_API;
1514 * Open the netmap device to fetch the number of queues of our
1517 * The first NIOCREGIF also detaches the card from the
1518 * protocol stack and may cause a reset of the card,
1519 * which in turn may take some time for the PHY to
1522 g.main_fd = open("/dev/netmap", O_RDWR);
1523 if (g.main_fd == -1) {
1524 D("Unable to open /dev/netmap");
1527 if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
1528 D("Unable to get if info without name");
1530 D("map size is %d Kb", nmr.nr_memsize >> 10);
1532 bzero(&nmr, sizeof(nmr));
1533 nmr.nr_version = NETMAP_API;
1534 strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name));
1535 if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
1536 D("Unable to get if info for %s", g.ifname);
1538 devqueues = nmr.nr_rx_rings;
1541 /* validate provided nthreads. */
1542 if (g.nthreads < 1 || g.nthreads > devqueues) {
1543 D("bad nthreads %d, have %d queues", g.nthreads, devqueues);
1544 // continue, fail later
1548 * Map the netmap shared memory: instead of issuing mmap()
1549 * inside the body of the threads, we prefer to keep this
1550 * operation here to simplify the thread logic.
1552 D("mapping %d Kbytes", nmr.nr_memsize>>10);
1553 g.mmap_size = nmr.nr_memsize;
1554 g.mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize,
1555 PROT_WRITE | PROT_READ,
1556 MAP_SHARED, g.main_fd, 0);
1557 if (g.mmap_addr == MAP_FAILED) {
1558 D("Unable to mmap %d KB", nmr.nr_memsize >> 10);
1559 // continue, fail later
1563 * Register the interface on the netmap device: from now on,
1564 * we can operate on the network interface without any
1565 * interference from the legacy network stack.
1567 * We decide to put the first interface registration here to
1568 * give time to cards that take a long time to reset the PHY.
1570 nmr.nr_version = NETMAP_API;
1571 if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) {
1572 D("Unable to register interface %s", g.ifname);
1573 //continue, fail later
1577 /* Print some debug information. */
1579 "%s %s: %d queues, %d threads and %d cpus.\n",
1580 (g.td_body == sender_body) ? "Sending on" : "Receiving from",
1585 if (g.td_body == sender_body) {
1586 fprintf(stdout, "%s -> %s (%s -> %s)\n",
1587 g.src_ip.name, g.dst_ip.name,
1588 g.src_mac.name, g.dst_mac.name);
1591 /* Exit if something went wrong. */
1592 if (g.main_fd < 0) {
1599 D("--- SPECIAL OPTIONS:%s%s%s%s%s\n",
1600 g.options & OPT_PREFETCH ? " prefetch" : "",
1601 g.options & OPT_ACCESS ? " access" : "",
1602 g.options & OPT_MEMCPY ? " memcpy" : "",
1603 g.options & OPT_INDIRECT ? " indirect" : "",
1604 g.options & OPT_COPY ? " copy" : "");
1607 if (g.tx_rate == 0) {
1608 g.tx_period.tv_sec = 0;
1609 g.tx_period.tv_nsec = 0;
1610 } else if (g.tx_rate == 1) {
1611 g.tx_period.tv_sec = 1;
1612 g.tx_period.tv_nsec = 0;
1614 g.tx_period.tv_sec = 0;
1615 g.tx_period.tv_nsec = (1e9 / g.tx_rate) * g.burst;
1616 if (g.tx_period.tv_nsec > 1000000000) {
1617 g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000;
1618 g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000;
1621 D("Sending %d packets every %d.%09d ns",
1622 g.burst, (int)g.tx_period.tv_sec, (int)g.tx_period.tv_nsec);
1623 /* Wait for PHY reset. */
1624 D("Wait %d secs for phy reset", wait_link);
1628 /* Install ^C handler. */
1629 global_nthreads = g.nthreads;
1630 signal(SIGINT, sigint_h);
1632 #if 0 // XXX this is not needed, i believe
1633 if (g.dev_type > DEV_NETMAP) {
1634 g.p = pcap_open_live(g.ifname, 0, 1, 100, NULL);
1636 D("cannot open pcap on %s", g.ifname);
1639 D("using pcap %p on %s", g.p, g.ifname);