2 * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * $Id: pkt-gen.c 12024 2013-01-25 05:41:51Z luigi $
30 * Example program to show how to build a multithreaded packet
31 * source/sink using the netmap device.
33 * In this example we create a programmable number of threads
34 * to take care of all the queues of the interface used to
35 * send or receive traffic.
41 const char *default_payload="netmap pkt-gen payload\n"
42 "http://info.iet.unipi.it/~luigi/netmap/ ";
44 int time_second; // support for RD() debugging macro
48 #define SKIP_PAYLOAD 1 /* do not check payload. */
51 struct ether_header eh;
54 uint8_t body[2048]; // XXX hardwired
55 } __attribute__((__packed__));
59 struct in_addr start, end, cur;
60 uint16_t port0, port1, cur_p;
65 struct ether_addr start, end;
69 * global arguments for all threads
73 struct ip_range src_ip;
74 struct ip_range dst_ip;
75 struct mac_range dst_mac;
76 struct mac_range src_mac;
80 int npackets; /* total packets to send */
83 int options; /* testing */
84 #define OPT_PREFETCH 1
88 #define OPT_TS 16 /* add a timestamp */
95 void *(*td_body)(void *);
100 enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP };
104 * Arguments for a new thread. The same structure is used by
105 * the source and the sink
114 struct netmap_if *nifp;
115 uint16_t qfirst, qlast; /* range of queues to scan */
116 volatile uint64_t count;
117 struct timeval tic, toc;
127 * extract the extremes from a range of ipv4 addresses.
128 * addr_lo[-addr_hi][:port_lo[-port_hi]]
131 extract_ip_range(struct ip_range *r)
134 char buf1[16]; // one ip address
136 D("extract IP range from %s", r->name);
137 p_lo = index(r->name, ':'); /* do we have ports ? */
139 D(" found ports at %s", p_lo);
141 p_hi = index(p_lo, '-');
146 r->port0 = strtol(p_lo, NULL, 0);
147 r->port1 = strtol(p_hi, NULL, 0);
148 if (r->port1 < r->port0) {
154 D("ports are %d to %d", r->port0, r->port1);
156 p_hi = index(r->name, '-'); /* do we have upper ip ? */
161 inet_aton(r->name, &r->start);
162 inet_aton(p_hi, &r->end);
163 if (r->start.s_addr > r->end.s_addr) {
169 strncpy(buf1, inet_ntoa(r->end), sizeof(buf1));
170 D("range is %s %d to %s %d", inet_ntoa(r->start), r->port0,
175 extract_mac_range(struct mac_range *r)
177 D("extract MAC range from %s", r->name);
178 bcopy(ether_aton(r->name), &r->start, 6);
179 bcopy(ether_aton(r->name), &r->end, 6);
181 bcopy(targ->src_mac, eh->ether_shost, 6);
182 p = index(targ->g->src_mac, '-');
184 targ->src_mac_range = atoi(p+1);
186 bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6);
187 bcopy(targ->dst_mac, eh->ether_dhost, 6);
188 p = index(targ->g->dst_mac, '-');
190 targ->dst_mac_range = atoi(p+1);
192 D("%s starts at %s", r->name, ether_ntoa(&r->start));
195 static struct targ *targs;
196 static int global_nthreads;
198 /* control-C handler */
204 (void)sig; /* UNUSED */
205 for (i = 0; i < global_nthreads; i++) {
208 signal(SIGINT, SIG_DFL);
211 /* sysctl wrapper to return the number of active CPUs */
222 sysctl(mib, 2, &ncpus, &len, NULL, 0);
227 #endif /* !__FreeBSD__ */
231 #define sockaddr_dl sockaddr_ll
232 #define sdl_family sll_family
233 #define AF_LINK AF_PACKET
234 #define LLADDR(s) s->sll_addr;
235 #include <linux/if_tun.h>
236 #define TAP_CLONEDEV "/dev/net/tun"
237 #endif /* __linux__ */
240 #include <net/if_tun.h>
241 #define TAP_CLONEDEV "/dev/tap"
242 #endif /* __FreeBSD */
245 // #warning TAP not supported on apple ?
246 #include <net/if_utun.h>
247 #define TAP_CLONEDEV "/dev/tap"
248 #endif /* __APPLE__ */
252 * locate the src mac address for our interface, put it
253 * into the user-supplied buffer. return 0 if ok, -1 on error.
256 source_hwaddr(const char *ifname, char *buf)
258 struct ifaddrs *ifaphead, *ifap;
259 int l = sizeof(ifap->ifa_name);
261 if (getifaddrs(&ifaphead) != 0) {
262 D("getifaddrs %s failed", ifname);
266 for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) {
267 struct sockaddr_dl *sdl =
268 (struct sockaddr_dl *)ifap->ifa_addr;
271 if (!sdl || sdl->sdl_family != AF_LINK)
273 if (strncmp(ifap->ifa_name, ifname, l) != 0)
275 mac = (uint8_t *)LLADDR(sdl);
276 sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x",
277 mac[0], mac[1], mac[2],
278 mac[3], mac[4], mac[5]);
280 D("source hwaddr %s", buf);
283 freeifaddrs(ifaphead);
288 /* set the thread affinity. */
290 setaffinity(pthread_t me, int i)
298 /* Set thread affinity affinity.*/
300 CPU_SET(i, &cpumask);
302 if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) {
303 D("Unable to set affinity");
307 (void)me; /* suppress 'unused' warnings */
309 #endif /* __FreeBSD__ */
313 /* Compute the checksum of the given ip header. */
315 checksum(const void *data, uint16_t len, uint32_t sum)
317 const uint8_t *addr = data;
320 /* Checksum all the pairs of bytes first... */
321 for (i = 0; i < (len & ~1U); i += 2) {
322 sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i)));
327 * If there's a single byte left over, checksum it, too.
328 * Network byte order is big-endian, so the remaining byte is
340 wrapsum(u_int32_t sum)
347 * Fill a packet with some payload.
348 * We create a UDP packet so the payload starts at
349 * 14+20+8 = 42 bytes.
352 #define uh_sport source
353 #define uh_dport dest
358 initialize_packet(struct targ *targ)
360 struct pkt *pkt = &targ->pkt;
361 struct ether_header *eh;
364 uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip);
365 int i, l, l0 = strlen(default_payload);
367 for (i = 0; i < paylen;) {
368 l = min(l0, paylen - i);
369 bcopy(default_payload, pkt->body + i, l);
372 pkt->body[i-1] = '\0';
375 ip->ip_v = IPVERSION;
378 ip->ip_tos = IPTOS_LOWDELAY;
379 ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh));
381 ip->ip_off = htons(IP_DF); /* Don't fragment */
382 ip->ip_ttl = IPDEFTTL;
383 ip->ip_p = IPPROTO_UDP;
384 ip->ip_dst.s_addr = targ->g->dst_ip.cur.s_addr;
385 if (++targ->g->dst_ip.cur.s_addr > targ->g->dst_ip.end.s_addr)
386 targ->g->dst_ip.cur.s_addr = targ->g->dst_ip.start.s_addr;
387 ip->ip_src.s_addr = targ->g->src_ip.cur.s_addr;
388 if (++targ->g->src_ip.cur.s_addr > targ->g->src_ip.end.s_addr)
389 targ->g->src_ip.cur.s_addr = targ->g->src_ip.start.s_addr;
390 ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0));
394 udp->uh_sport = htons(targ->g->src_ip.cur_p);
395 if (++targ->g->src_ip.cur_p > targ->g->src_ip.port1)
396 targ->g->src_ip.cur_p = targ->g->src_ip.port0;
397 udp->uh_dport = htons(targ->g->dst_ip.cur_p);
398 if (++targ->g->dst_ip.cur_p > targ->g->dst_ip.port1)
399 targ->g->dst_ip.cur_p = targ->g->dst_ip.port0;
400 udp->uh_ulen = htons(paylen);
401 /* Magic: taken from sbin/dhclient/packet.c */
402 udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp),
404 paylen - sizeof(*udp),
405 checksum(&ip->ip_src, 2 * sizeof(ip->ip_src),
406 IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen)
412 bcopy(&targ->g->src_mac.start, eh->ether_shost, 6);
413 bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6);
414 eh->ether_type = htons(ETHERTYPE_IP);
417 /* Check the payload of the packet for errors (use it for debug).
418 * Look for consecutive ascii representations of the size of the packet.
421 check_payload(char *p, int psize)
424 int n_read, size, sizelen;
426 /* get the length in ASCII of the length of the packet. */
427 sizelen = sprintf(temp, "%d", psize) + 1; // include a whitespace
430 p += 14; /* skip packet header. */
432 while (psize - n_read >= sizelen) {
433 sscanf(p, "%d", &size);
435 D("Read %d instead of %d", size, psize);
446 * create and enqueue a batch of packets on a ring.
447 * On the last one set NS_REPORT to tell the driver to generate
448 * an interrupt when done.
451 send_packets(struct netmap_ring *ring, struct pkt *pkt,
452 int size, u_int count, int options)
454 u_int sent, cur = ring->cur;
456 if (ring->avail < count)
460 if (options & (OPT_COPY | OPT_PREFETCH) ) {
461 for (sent = 0; sent < count; sent++) {
462 struct netmap_slot *slot = &ring->slot[cur];
463 char *p = NETMAP_BUF(ring, slot->buf_idx);
466 cur = NETMAP_RING_NEXT(ring, cur);
471 for (sent = 0; sent < count; sent++) {
472 struct netmap_slot *slot = &ring->slot[cur];
473 char *p = NETMAP_BUF(ring, slot->buf_idx);
475 if (options & OPT_COPY)
476 pkt_copy(pkt, p, size);
477 else if (options & OPT_MEMCPY)
478 memcpy(p, pkt, size);
479 else if (options & OPT_PREFETCH)
482 if (sent == count - 1)
483 slot->flags |= NS_REPORT;
484 cur = NETMAP_RING_NEXT(ring, cur);
493 * Send a packet, and wait for a response.
494 * The payload (after UDP header, ofs 42) has a 4-byte sequence
495 * followed by a struct timeval (or bintime?)
497 #define PAY_OFS 42 /* where in the pkt... */
500 pinger_body(void *data)
502 struct targ *targ = (struct targ *) data;
503 struct pollfd fds[1];
504 struct netmap_if *nifp = targ->nifp;
505 int i, rx = 0, n = targ->g->npackets;
507 fds[0].fd = targ->fd;
508 fds[0].events = (POLLIN);
509 static uint32_t sent;
510 struct timespec ts, now, last_print;
511 uint32_t count = 0, min = 1000000000, av = 0;
513 if (targ->g->nthreads > 1) {
514 D("can only ping with 1 thread");
518 clock_gettime(CLOCK_REALTIME_PRECISE, &last_print);
519 while (n == 0 || (int)sent < n) {
520 struct netmap_ring *ring = NETMAP_TXRING(nifp, 0);
521 struct netmap_slot *slot;
523 for (i = 0; i < 1; i++) {
524 slot = &ring->slot[ring->cur];
525 slot->len = targ->g->pkt_size;
526 p = NETMAP_BUF(ring, slot->buf_idx);
528 if (ring->avail == 0) {
529 D("-- ouch, cannot send");
531 pkt_copy(&targ->pkt, p, targ->g->pkt_size);
532 clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
533 bcopy(&sent, p+42, sizeof(sent));
534 bcopy(&ts, p+46, sizeof(ts));
536 ring->cur = NETMAP_RING_NEXT(ring, ring->cur);
540 /* should use a parameter to decide how often to send */
541 if (poll(fds, 1, 3000) <= 0) {
542 D("poll error/timeout on queue %d", targ->me);
545 /* see what we got back */
546 for (i = targ->qfirst; i < targ->qlast; i++) {
547 ring = NETMAP_RXRING(nifp, i);
548 while (ring->avail > 0) {
550 slot = &ring->slot[ring->cur];
551 p = NETMAP_BUF(ring, slot->buf_idx);
553 clock_gettime(CLOCK_REALTIME_PRECISE, &now);
554 bcopy(p+42, &seq, sizeof(seq));
555 bcopy(p+46, &ts, sizeof(ts));
556 ts.tv_sec = now.tv_sec - ts.tv_sec;
557 ts.tv_nsec = now.tv_nsec - ts.tv_nsec;
558 if (ts.tv_nsec < 0) {
559 ts.tv_nsec += 1000000000;
562 if (1) D("seq %d/%d delta %d.%09d", seq, sent,
563 (int)ts.tv_sec, (int)ts.tv_nsec);
564 if (ts.tv_nsec < (int)min)
569 ring->cur = NETMAP_RING_NEXT(ring, ring->cur);
573 //D("tx %d rx %d", sent, rx);
575 ts.tv_sec = now.tv_sec - last_print.tv_sec;
576 ts.tv_nsec = now.tv_nsec - last_print.tv_nsec;
577 if (ts.tv_nsec < 0) {
578 ts.tv_nsec += 1000000000;
581 if (ts.tv_sec >= 1) {
582 D("count %d min %d av %d",
583 count, min, av/count);
595 * reply to ping requests
598 ponger_body(void *data)
600 struct targ *targ = (struct targ *) data;
601 struct pollfd fds[1];
602 struct netmap_if *nifp = targ->nifp;
603 struct netmap_ring *txring, *rxring;
604 int i, rx = 0, sent = 0, n = targ->g->npackets;
605 fds[0].fd = targ->fd;
606 fds[0].events = (POLLIN);
608 if (targ->g->nthreads > 1) {
609 D("can only reply ping with 1 thread");
612 D("understood ponger %d but don't know how to do it", n);
613 while (n == 0 || sent < n) {
614 uint32_t txcur, txavail;
617 ioctl(fds[0].fd, NIOCRXSYNC, NULL);
619 if (poll(fds, 1, 1000) <= 0) {
620 D("poll error/timeout on queue %d", targ->me);
624 txring = NETMAP_TXRING(nifp, 0);
626 txavail = txring->avail;
627 /* see what we got back */
628 for (i = targ->qfirst; i < targ->qlast; i++) {
629 rxring = NETMAP_RXRING(nifp, i);
630 while (rxring->avail > 0) {
631 uint16_t *spkt, *dpkt;
632 uint32_t cur = rxring->cur;
633 struct netmap_slot *slot = &rxring->slot[cur];
635 src = NETMAP_BUF(rxring, slot->buf_idx);
636 //D("got pkt %p of size %d", src, slot->len);
638 rxring->cur = NETMAP_RING_NEXT(rxring, cur);
642 dst = NETMAP_BUF(txring,
643 txring->slot[txcur].buf_idx);
645 dpkt = (uint16_t *)dst;
646 spkt = (uint16_t *)src;
647 pkt_copy(src, dst, slot->len);
654 txring->slot[txcur].len = slot->len;
655 /* XXX swap src dst mac */
656 txcur = NETMAP_RING_NEXT(txring, txcur);
662 txring->avail = txavail;
665 ioctl(fds[0].fd, NIOCTXSYNC, NULL);
667 //D("tx %d rx %d", sent, rx);
674 sender_body(void *data)
676 struct targ *targ = (struct targ *) data;
678 struct pollfd fds[1];
679 struct netmap_if *nifp = targ->nifp;
680 struct netmap_ring *txring;
681 int i, n = targ->g->npackets / targ->g->nthreads, sent = 0;
682 int options = targ->g->options | OPT_COPY;
684 if (setaffinity(targ->thread, targ->affinity))
686 /* setup poll(2) mechanism. */
687 memset(fds, 0, sizeof(fds));
688 fds[0].fd = targ->fd;
689 fds[0].events = (POLLOUT);
692 gettimeofday(&targ->tic, NULL);
694 if (targ->g->dev_type == DEV_PCAP) {
695 int size = targ->g->pkt_size;
696 void *pkt = &targ->pkt;
697 pcap_t *p = targ->g->p;
699 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
700 if (pcap_inject(p, pkt, size) != -1)
707 } else if (targ->g->dev_type == DEV_TAP) { /* tap */
708 int size = targ->g->pkt_size;
709 void *pkt = &targ->pkt;
710 D("writing to file desc %d", targ->g->main_fd);
712 for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) {
713 if (write(targ->g->main_fd, pkt, size) != -1)
721 while (!targ->cancel && (n == 0 || sent < n)) {
724 * wait for available room in the send queue(s)
726 if (poll(fds, 1, 2000) <= 0) {
729 D("poll error/timeout on queue %d", targ->me);
733 * scan our queues and send on those with room
735 if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) {
737 options &= ~OPT_COPY;
739 for (i = targ->qfirst; i < targ->qlast; i++) {
740 int m, limit = targ->g->burst;
741 if (n > 0 && n - sent < limit)
743 txring = NETMAP_TXRING(nifp, i);
744 if (txring->avail == 0)
746 m = send_packets(txring, &targ->pkt, targ->g->pkt_size,
752 /* flush any remaining packets */
753 ioctl(fds[0].fd, NIOCTXSYNC, NULL);
755 /* final part: wait all the TX queues to be empty. */
756 for (i = targ->qfirst; i < targ->qlast; i++) {
757 txring = NETMAP_TXRING(nifp, i);
758 while (!NETMAP_TX_RING_EMPTY(txring)) {
759 ioctl(fds[0].fd, NIOCTXSYNC, NULL);
760 usleep(1); /* wait 1 tick */
765 gettimeofday(&targ->toc, NULL);
770 /* reset the ``used`` flag. */
778 receive_pcap(u_char *user, const struct pcap_pkthdr * h,
779 const u_char * bytes)
781 int *count = (int *)user;
782 (void)h; /* UNUSED */
783 (void)bytes; /* UNUSED */
788 receive_packets(struct netmap_ring *ring, u_int limit, int skip_payload)
793 if (ring->avail < limit)
795 for (rx = 0; rx < limit; rx++) {
796 struct netmap_slot *slot = &ring->slot[cur];
797 char *p = NETMAP_BUF(ring, slot->buf_idx);
800 check_payload(p, slot->len);
802 cur = NETMAP_RING_NEXT(ring, cur);
811 receiver_body(void *data)
813 struct targ *targ = (struct targ *) data;
814 struct pollfd fds[1];
815 struct netmap_if *nifp = targ->nifp;
816 struct netmap_ring *rxring;
818 uint64_t received = 0;
820 if (setaffinity(targ->thread, targ->affinity))
823 /* setup poll(2) mechanism. */
824 memset(fds, 0, sizeof(fds));
825 fds[0].fd = targ->fd;
826 fds[0].events = (POLLIN);
828 /* unbounded wait for the first packet. */
830 i = poll(fds, 1, 1000);
831 if (i > 0 && !(fds[0].revents & POLLERR))
833 D("waiting for initial packets, poll returns %d %d", i, fds[0].revents);
836 /* main loop, exit after 1s silence */
837 gettimeofday(&targ->tic, NULL);
838 if (targ->g->dev_type == DEV_PCAP) {
839 while (!targ->cancel) {
840 /* XXX should we poll ? */
841 pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL);
843 } else if (targ->g->dev_type == DEV_TAP) {
844 D("reading from %s fd %d", targ->g->ifname, targ->g->main_fd);
845 while (!targ->cancel) {
847 /* XXX should we poll ? */
848 if (read(targ->g->main_fd, buf, sizeof(buf)) > 0)
852 while (!targ->cancel) {
853 /* Once we started to receive packets, wait at most 1 seconds
855 if (poll(fds, 1, 1 * 1000) <= 0 && targ->g->forever == 0) {
856 gettimeofday(&targ->toc, NULL);
857 targ->toc.tv_sec -= 1; /* Subtract timeout time. */
861 for (i = targ->qfirst; i < targ->qlast; i++) {
864 rxring = NETMAP_RXRING(nifp, i);
865 if (rxring->avail == 0)
868 m = receive_packets(rxring, targ->g->burst,
872 targ->count = received;
874 // tell the card we have read the data
875 //ioctl(fds[0].fd, NIOCRXSYNC, NULL);
880 targ->count = received;
883 /* reset the ``used`` flag. */
889 /* very crude code to print a number in normalized form.
890 * Caller has to make sure that the buffer is large enough.
893 norm(char *buf, double val)
895 char *units[] = { "", "K", "M", "G" };
898 for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *); i++)
900 sprintf(buf, "%.2f %s", val, units[i]);
905 tx_output(uint64_t sent, int size, double delta)
907 double bw, raw_bw, pps;
908 char b1[40], b2[80], b3[80];
910 printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n",
914 if (size < 60) /* correct for min packet size */
917 bw = (8.0 * size * sent) / delta;
918 /* raw packets have4 bytes crc + 20 bytes framing */
919 raw_bw = (8.0 * (size + 24) * sent) / delta;
921 printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n",
922 norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) );
927 rx_output(uint64_t received, double delta)
932 printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta);
936 pps = received / delta;
937 printf("Speed: %spps\n", norm(b1, pps));
943 const char *cmd = "pkt-gen";
947 "\t-i interface interface name\n"
948 "\t-f function tx rx ping pong\n"
949 "\t-n count number of iterations (can be 0)\n"
950 "\t-t pkts_to_send also forces tx mode\n"
951 "\t-r pkts_to_receive also forces rx mode\n"
952 "\t-l pkts_size in bytes excluding CRC\n"
953 "\t-d dst-ip end with %%n to sweep n addresses\n"
954 "\t-s src-ip end with %%n to sweep n addresses\n"
955 "\t-D dst-mac end with %%n to sweep n addresses\n"
956 "\t-S src-mac end with %%n to sweep n addresses\n"
957 "\t-a cpu_id use setaffinity\n"
958 "\t-b burst size testing, mostly\n"
959 "\t-c cores cores to use\n"
960 "\t-p threads processes/threads to use\n"
961 "\t-T report_ms milliseconds between reports\n"
962 "\t-P use libpcap instead of netmap\n"
963 "\t-w wait_for_link_time in seconds\n"
971 start_threads(struct glob_arg *g)
975 targs = calloc(g->nthreads, sizeof(*targs));
977 * Now create the desired number of threads, each one
978 * using a single descriptor.
980 for (i = 0; i < g->nthreads; i++) {
981 bzero(&targs[i], sizeof(targs[i]));
982 targs[i].fd = -1; /* default, with pcap */
985 if (g->dev_type == DEV_NETMAP) {
989 /* register interface. */
990 tfd = open("/dev/netmap", O_RDWR);
992 D("Unable to open /dev/netmap");
997 bzero(&tifreq, sizeof(tifreq));
998 strncpy(tifreq.nr_name, g->ifname, sizeof(tifreq.nr_name));
999 tifreq.nr_version = NETMAP_API;
1000 tifreq.nr_ringid = (g->nthreads > 1) ? (i | NETMAP_HW_RING) : 0;
1003 * if we are acting as a receiver only, do not touch the transmit ring.
1004 * This is not the default because many apps may use the interface
1005 * in both directions, but a pure receiver does not.
1007 if (g->td_body == receiver_body) {
1008 tifreq.nr_ringid |= NETMAP_NO_TX_POLL;
1011 if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) {
1012 D("Unable to register %s", g->ifname);
1015 targs[i].nmr = tifreq;
1016 targs[i].nifp = NETMAP_IF(g->mmap_addr, tifreq.nr_offset);
1017 /* start threads. */
1018 targs[i].qfirst = (g->nthreads > 1) ? i : 0;
1019 targs[i].qlast = (g->nthreads > 1) ? i+1 :
1020 (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings);
1022 targs[i].fd = g->main_fd;
1026 if (g->affinity >= 0) {
1027 if (g->affinity < g->cpus)
1028 targs[i].affinity = g->affinity;
1030 targs[i].affinity = i % g->cpus;
1032 targs[i].affinity = -1;
1033 /* default, init packets */
1034 initialize_packet(&targs[i]);
1036 if (pthread_create(&targs[i].thread, NULL, g->td_body,
1038 D("Unable to create thread %d", i);
1045 main_thread(struct glob_arg *g)
1052 struct timeval tic, toc;
1054 gettimeofday(&toc, NULL);
1056 struct timeval now, delta;
1057 uint64_t pps, usec, my_count, npkts;
1060 delta.tv_sec = g->report_interval/1000;
1061 delta.tv_usec = (g->report_interval%1000)*1000;
1062 select(0, NULL, NULL, NULL, &delta);
1063 gettimeofday(&now, NULL);
1064 time_second = now.tv_sec;
1065 timersub(&now, &toc, &toc);
1067 for (i = 0; i < g->nthreads; i++) {
1068 my_count += targs[i].count;
1069 if (targs[i].used == 0)
1072 usec = toc.tv_sec* 1000000 + toc.tv_usec;
1075 npkts = my_count - prev;
1076 pps = (npkts*1000000 + usec/2) / usec;
1077 D("%" PRIu64 " pps (%" PRIu64 " pkts in %" PRIu64 " usec)",
1081 if (done == g->nthreads)
1087 for (i = 0; i < g->nthreads; i++) {
1089 * Join active threads, unregister interfaces and close
1092 pthread_join(targs[i].thread, NULL);
1095 if (targs[i].completed == 0)
1096 D("ouch, thread %d exited with error", i);
1099 * Collect threads output and extract information about
1100 * how long it took to send all the packets.
1102 count += targs[i].count;
1103 if (!timerisset(&tic) || timercmp(&targs[i].tic, &tic, <))
1105 if (!timerisset(&toc) || timercmp(&targs[i].toc, &toc, >))
1110 timersub(&toc, &tic, &toc);
1111 delta_t = toc.tv_sec + 1e-6* toc.tv_usec;
1112 if (g->td_body == sender_body)
1113 tx_output(count, g->pkt_size, delta_t);
1115 rx_output(count, delta_t);
1117 if (g->dev_type == DEV_NETMAP) {
1118 ioctl(g->main_fd, NIOCUNREGIF, NULL); // XXX deprecated
1119 munmap(g->mmap_addr, g->mmap_size);
1130 static struct sf func[] = {
1131 { "tx", sender_body },
1132 { "rx", receiver_body },
1133 { "ping", pinger_body },
1134 { "pong", ponger_body },
1139 tap_alloc(char *dev)
1143 char *clonedev = TAP_CLONEDEV;
1147 /* Arguments taken by the function:
1149 * char *dev: the name of an interface (or '\0'). MUST have enough
1150 * space to hold the interface name if '\0' is passed
1151 * int flags: interface flags (eg, IFF_TUN etc.)
1155 if (dev[3]) { /* tapSomething */
1156 static char buf[128];
1157 snprintf(buf, sizeof(buf), "/dev/%s", dev);
1161 /* open the device */
1162 if( (fd = open(clonedev, O_RDWR)) < 0 ) {
1165 D("%s open successful", clonedev);
1167 /* preparation of the struct ifr, of type "struct ifreq" */
1168 memset(&ifr, 0, sizeof(ifr));
1171 ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
1174 /* if a device name was specified, put it in the structure; otherwise,
1175 * the kernel will try to allocate the "next" device of the
1177 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
1180 /* try to create the device */
1181 if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) {
1182 D("failed to to a TUNSETIFF");
1187 /* if the operation was successful, write back the name of the
1188 * interface to the variable "dev", so the caller can know
1189 * it. Note that the caller MUST reserve space in *dev (see calling
1191 strcpy(dev, ifr.ifr_name);
1192 D("new name is %s", dev);
1195 /* this is the special file descriptor that the caller will use to talk
1196 * with the virtual interface */
1201 main(int arc, char **argv)
1210 int devqueues = 1; /* how many device queues */
1212 bzero(&g, sizeof(g));
1215 g.td_body = receiver_body;
1216 g.report_interval = 1000; /* report interval */
1218 /* ip addresses can also be a range x.x.x.x-x.x.x.y */
1219 g.src_ip.name = "10.0.0.1";
1220 g.dst_ip.name = "10.1.0.1";
1221 g.dst_mac.name = "ff:ff:ff:ff:ff:ff";
1222 g.src_mac.name = NULL;
1224 g.burst = 512; // default
1228 while ( (ch = getopt(arc, argv,
1229 "a:f:n:i:t:r:l:d:s:D:S:b:c:o:p:PT:w:Wv")) != -1) {
1234 D("bad option %c %s", ch, optarg);
1239 g.npackets = atoi(optarg);
1243 for (fn = func; fn->key; fn++) {
1244 if (!strcmp(fn->key, optarg))
1250 D("unrecognised function %s", optarg);
1253 case 'o': /* data generation options */
1254 g.options = atoi(optarg);
1257 case 'a': /* force affinity */
1258 g.affinity = atoi(optarg);
1261 case 'i': /* interface */
1263 if (!strncmp(optarg, "tap", 3))
1264 g.dev_type = DEV_TAP;
1266 g.dev_type = DEV_NETMAP;
1269 case 't': /* send, deprecated */
1270 D("-t deprecated, please use -f tx -n %s", optarg);
1271 g.td_body = sender_body;
1272 g.npackets = atoi(optarg);
1275 case 'r': /* receive */
1276 D("-r deprecated, please use -f rx -n %s", optarg);
1277 g.td_body = receiver_body;
1278 g.npackets = atoi(optarg);
1281 case 'l': /* pkt_size */
1282 g.pkt_size = atoi(optarg);
1286 g.dst_ip.name = optarg;
1290 g.src_ip.name = optarg;
1293 case 'T': /* report interval */
1294 g.report_interval = atoi(optarg);
1298 wait_link = atoi(optarg);
1302 g.forever = 1; /* do not exit rx even with no traffic */
1305 case 'b': /* burst */
1306 g.burst = atoi(optarg);
1309 g.cpus = atoi(optarg);
1312 g.nthreads = atoi(optarg);
1316 g.dev_type = DEV_PCAP;
1319 case 'D': /* destination mac */
1320 g.dst_mac.name = optarg;
1323 case 'S': /* source mac */
1324 g.src_mac.name = optarg;
1331 if (g.ifname == NULL) {
1332 D("missing ifname");
1337 if (g.cpus < 0 || g.cpus > i) {
1338 D("%d cpus is too high, have only %d cpus", g.cpus, i);
1344 if (g.pkt_size < 16 || g.pkt_size > 1536) {
1345 D("bad pktsize %d\n", g.pkt_size);
1349 if (g.src_mac.name == NULL) {
1350 static char mybuf[20] = "00:00:00:00:00:00";
1351 /* retrieve source mac address. */
1352 if (source_hwaddr(g.ifname, mybuf) == -1) {
1353 D("Unable to retrieve source mac");
1354 // continue, fail later
1356 g.src_mac.name = mybuf;
1358 /* extract address ranges */
1359 extract_ip_range(&g.src_ip);
1360 extract_ip_range(&g.dst_ip);
1361 extract_mac_range(&g.src_mac);
1362 extract_mac_range(&g.dst_mac);
1364 if (g.dev_type == DEV_TAP) {
1365 D("want to use tap %s", g.ifname);
1366 g.main_fd = tap_alloc(g.ifname);
1367 if (g.main_fd < 0) {
1368 D("cannot open tap %s", g.ifname);
1371 } else if (g.dev_type > DEV_NETMAP) {
1372 char pcap_errbuf[PCAP_ERRBUF_SIZE];
1374 D("using pcap on %s", g.ifname);
1375 pcap_errbuf[0] = '\0'; // init the buffer
1376 g.p = pcap_open_live(g.ifname, 0, 1, 100, pcap_errbuf);
1378 D("cannot open pcap on %s", g.ifname);
1382 bzero(&nmr, sizeof(nmr));
1383 nmr.nr_version = NETMAP_API;
1385 * Open the netmap device to fetch the number of queues of our
1388 * The first NIOCREGIF also detaches the card from the
1389 * protocol stack and may cause a reset of the card,
1390 * which in turn may take some time for the PHY to
1393 g.main_fd = open("/dev/netmap", O_RDWR);
1394 if (g.main_fd == -1) {
1395 D("Unable to open /dev/netmap");
1398 if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
1399 D("Unable to get if info without name");
1401 D("map size is %d Kb", nmr.nr_memsize >> 10);
1403 bzero(&nmr, sizeof(nmr));
1404 nmr.nr_version = NETMAP_API;
1405 strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name));
1406 if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) {
1407 D("Unable to get if info for %s", g.ifname);
1409 devqueues = nmr.nr_rx_rings;
1412 /* validate provided nthreads. */
1413 if (g.nthreads < 1 || g.nthreads > devqueues) {
1414 D("bad nthreads %d, have %d queues", g.nthreads, devqueues);
1415 // continue, fail later
1419 * Map the netmap shared memory: instead of issuing mmap()
1420 * inside the body of the threads, we prefer to keep this
1421 * operation here to simplify the thread logic.
1423 D("mapping %d Kbytes", nmr.nr_memsize>>10);
1424 g.mmap_size = nmr.nr_memsize;
1425 g.mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize,
1426 PROT_WRITE | PROT_READ,
1427 MAP_SHARED, g.main_fd, 0);
1428 if (g.mmap_addr == MAP_FAILED) {
1429 D("Unable to mmap %d KB", nmr.nr_memsize >> 10);
1430 // continue, fail later
1434 * Register the interface on the netmap device: from now on,
1435 * we can operate on the network interface without any
1436 * interference from the legacy network stack.
1438 * We decide to put the first interface registration here to
1439 * give time to cards that take a long time to reset the PHY.
1441 nmr.nr_version = NETMAP_API;
1442 if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) {
1443 D("Unable to register interface %s", g.ifname);
1444 //continue, fail later
1448 /* Print some debug information. */
1450 "%s %s: %d queues, %d threads and %d cpus.\n",
1451 (g.td_body == sender_body) ? "Sending on" : "Receiving from",
1456 if (g.td_body == sender_body) {
1457 fprintf(stdout, "%s -> %s (%s -> %s)\n",
1458 g.src_ip.name, g.dst_ip.name,
1459 g.src_mac.name, g.dst_mac.name);
1462 /* Exit if something went wrong. */
1463 if (g.main_fd < 0) {
1470 D("special options:%s%s%s%s\n",
1471 g.options & OPT_PREFETCH ? " prefetch" : "",
1472 g.options & OPT_ACCESS ? " access" : "",
1473 g.options & OPT_MEMCPY ? " memcpy" : "",
1474 g.options & OPT_COPY ? " copy" : "");
1476 /* Wait for PHY reset. */
1477 D("Wait %d secs for phy reset", wait_link);
1481 /* Install ^C handler. */
1482 global_nthreads = g.nthreads;
1483 signal(SIGINT, sigint_h);
1485 #if 0 // XXX this is not needed, i believe
1486 if (g.dev_type > DEV_NETMAP) {
1487 g.p = pcap_open_live(g.ifname, 0, 1, 100, NULL);
1489 D("cannot open pcap on %s", g.ifname);
1492 D("using pcap %p on %s", g.p, g.ifname);