2 * Copyright (c) 2011-2012 Intel Corporation. All rights reserved.
3 * Copyright (c) 2014-2015 Mellanox Technologies LTD. All rights reserved.
5 * This software is available to you under the OpenIB.org BSD license
8 * Redistribution and use in source and binary forms, with or
9 * without modification, are permitted provided that the following
12 * - Redistributions of source code must retain the above
13 * copyright notice, this list of conditions and the following
16 * - Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials
19 * provided with the distribution.
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
25 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
26 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
27 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 #include <sys/types.h>
38 #include <sys/socket.h>
44 #include <netinet/tcp.h>
46 #include <rdma/rdma_cma.h>
47 #include <rdma/rsocket.h>
48 #include <util/compiler.h>
51 struct test_size_param {
56 static struct test_size_param test_size[] = {
58 { 1 << 7, 1 }, { (1 << 7) + (1 << 6), 1},
59 { 1 << 8, 1 }, { (1 << 8) + (1 << 7), 1},
60 { 1 << 9, 1 }, { (1 << 9) + (1 << 8), 1},
61 { 1 << 10, 1 }, { (1 << 10) + (1 << 9), 1},
62 { 1 << 11, 1 }, { (1 << 11) + (1 << 10), 1},
63 { 1 << 12, 0 }, { (1 << 12) + (1 << 11), 1},
64 { 1 << 13, 1 }, { (1 << 13) + (1 << 12), 1},
65 { 1 << 14, 1 }, { (1 << 14) + (1 << 13), 1},
66 { 1 << 15, 1 }, { (1 << 15) + (1 << 14), 1},
67 { 1 << 16, 0 }, { (1 << 16) + (1 << 15), 1},
68 { 1 << 17, 1 }, { (1 << 17) + (1 << 16), 1},
69 { 1 << 18, 1 }, { (1 << 18) + (1 << 17), 1},
70 { 1 << 19, 1 }, { (1 << 19) + (1 << 18), 1},
71 { 1 << 20, 0 }, { (1 << 20) + (1 << 19), 1},
72 { 1 << 21, 1 }, { (1 << 21) + (1 << 20), 1},
73 { 1 << 22, 1 }, { (1 << 22) + (1 << 21), 1},
75 #define TEST_CNT (sizeof test_size / sizeof test_size[0])
81 static int flags = MSG_DONTWAIT;
82 static int poll_timeout = 0;
85 static pid_t fork_pid;
86 static enum rs_optimization optimization;
87 static int size_option;
88 static int iterations = 1;
89 static int transfer_size = 1000;
90 static int transfer_count = 1000;
91 static int buffer_size, inline_size = 64;
92 static char test_name[10] = "custom";
93 static const char *port = "7471";
95 static char *dst_addr;
96 static char *src_addr;
97 static struct timeval start, end;
99 static struct rdma_addrinfo rai_hints;
100 static struct addrinfo ai_hints;
102 static void show_perf(void)
108 usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
109 bytes = (long long) iterations * transfer_count * transfer_size * 2;
111 /* name size transfers iterations bytes seconds Gb/sec usec/xfer */
112 printf("%-10s", test_name);
113 size_str(str, sizeof str, transfer_size);
115 cnt_str(str, sizeof str, transfer_count);
117 cnt_str(str, sizeof str, iterations);
119 size_str(str, sizeof str, bytes);
121 printf("%8.2fs%10.2f%11.2f\n",
122 usec / 1000000., (bytes * 8) / (1000. * usec),
123 (usec / iterations) / (transfer_count * 2));
126 static void init_latency_test(int size)
130 size_str(sstr, sizeof sstr, size);
131 snprintf(test_name, sizeof test_name, "%s_lat", sstr);
133 transfer_size = size;
134 iterations = size_to_count(transfer_size);
137 static void init_bandwidth_test(int size)
141 size_str(sstr, sizeof sstr, size);
142 snprintf(test_name, sizeof test_name, "%s_bw", sstr);
144 transfer_size = size;
145 transfer_count = size_to_count(transfer_size);
148 static int send_xfer(int size)
154 format_buf(buf, size);
158 fds.events = POLLOUT;
161 for (offset = 0; offset < size; ) {
163 ret = do_poll(&fds, poll_timeout);
168 ret = rs_send(rs, buf + offset, size - offset, flags);
171 } else if (errno != EWOULDBLOCK && errno != EAGAIN) {
180 static int recv_xfer(int size)
190 for (offset = 0; offset < size; ) {
192 ret = do_poll(&fds, poll_timeout);
197 ret = rs_recv(rs, buf + offset, size - offset, flags);
200 } else if (errno != EWOULDBLOCK && errno != EAGAIN) {
207 ret = verify_buf(buf, size);
215 static int sync_test(void)
219 ret = dst_addr ? send_xfer(16) : recv_xfer(16);
223 return dst_addr ? recv_xfer(16) : send_xfer(16);
226 static int run_test(void)
234 gettimeofday(&start, NULL);
235 for (i = 0; i < iterations; i++) {
236 for (t = 0; t < transfer_count; t++) {
237 ret = dst_addr ? send_xfer(transfer_size) :
238 recv_xfer(transfer_size);
243 for (t = 0; t < transfer_count; t++) {
244 ret = dst_addr ? recv_xfer(transfer_size) :
245 send_xfer(transfer_size);
250 gettimeofday(&end, NULL);
258 static void set_keepalive(int fd)
261 socklen_t optlen = sizeof(optlen);
264 if (rs_setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &optval, optlen)) {
265 perror("rsetsockopt SO_KEEPALIVE");
270 if (rs_setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &optval, optlen))
271 perror("rsetsockopt TCP_KEEPIDLE");
273 if (!(rs_getsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &optval, &optlen)))
274 printf("Keepalive: %s\n", (optval ? "ON" : "OFF"));
276 if (!(rs_getsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &optval, &optlen)))
277 printf(" time: %i\n", optval);
280 static void set_options(int fd)
285 rs_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *) &buffer_size,
287 rs_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *) &buffer_size,
291 rs_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (void *) &val, sizeof val);
292 rs_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (void *) &val, sizeof val);
296 rs_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (void *) &val, sizeof(val));
298 if (flags & MSG_DONTWAIT)
299 rs_fcntl(fd, F_SETFL, O_NONBLOCK);
302 /* Inline size based on experimental data */
303 if (optimization == opt_latency) {
304 rs_setsockopt(fd, SOL_RDMA, RDMA_INLINE, &inline_size,
306 } else if (optimization == opt_bandwidth) {
308 rs_setsockopt(fd, SOL_RDMA, RDMA_INLINE, &val, sizeof val);
316 static int server_listen(void)
318 struct rdma_addrinfo *rai = NULL;
323 rai_hints.ai_flags |= RAI_PASSIVE;
324 ret = rdma_getaddrinfo(src_addr, port, &rai_hints, &rai);
326 ai_hints.ai_flags |= AI_PASSIVE;
327 ret = getaddrinfo(src_addr, port, &ai_hints, &ai);
330 printf("getaddrinfo: %s\n", gai_strerror(ret));
334 lrs = rai ? rs_socket(rai->ai_family, SOCK_STREAM, 0) :
335 rs_socket(ai->ai_family, SOCK_STREAM, 0);
343 ret = rs_setsockopt(lrs, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val);
345 perror("rsetsockopt SO_REUSEADDR");
349 ret = rai ? rs_bind(lrs, rai->ai_src_addr, rai->ai_src_len) :
350 rs_bind(lrs, ai->ai_addr, ai->ai_addrlen);
356 ret = rs_listen(lrs, 1);
365 rdma_freeaddrinfo(rai);
371 static int server_connect(void)
382 ret = do_poll(&fds, poll_timeout);
389 rs = rs_accept(lrs, NULL, NULL);
390 } while (rs < 0 && (errno == EAGAIN || errno == EWOULDBLOCK));
403 static int client_connect(void)
405 struct rdma_addrinfo *rai = NULL, *rai_src = NULL;
406 struct addrinfo *ai, *ai_src;
411 ret = use_rgai ? rdma_getaddrinfo(dst_addr, port, &rai_hints, &rai) :
412 getaddrinfo(dst_addr, port, &ai_hints, &ai);
415 printf("getaddrinfo: %s\n", gai_strerror(ret));
421 rai_hints.ai_flags |= RAI_PASSIVE;
422 ret = rdma_getaddrinfo(src_addr, port, &rai_hints, &rai_src);
424 ai_hints.ai_flags |= AI_PASSIVE;
425 ret = getaddrinfo(src_addr, port, &ai_hints, &ai_src);
428 printf("getaddrinfo src_addr: %s\n", gai_strerror(ret));
433 rs = rai ? rs_socket(rai->ai_family, SOCK_STREAM, 0) :
434 rs_socket(ai->ai_family, SOCK_STREAM, 0);
444 ret = rai ? rs_bind(rs, rai_src->ai_src_addr, rai_src->ai_src_len) :
445 rs_bind(rs, ai_src->ai_addr, ai_src->ai_addrlen);
452 if (rai && rai->ai_route) {
453 ret = rs_setsockopt(rs, SOL_RDMA, RDMA_ROUTE, rai->ai_route,
456 perror("rsetsockopt RDMA_ROUTE");
461 ret = rai ? rs_connect(rs, rai->ai_dst_addr, rai->ai_dst_len) :
462 rs_connect(rs, ai->ai_addr, ai->ai_addrlen);
463 if (ret && (errno != EINPROGRESS)) {
468 if (ret && (errno == EINPROGRESS)) {
470 fds.events = POLLOUT;
471 ret = do_poll(&fds, poll_timeout);
478 ret = rs_getsockopt(rs, SOL_SOCKET, SO_ERROR, &err, &len);
484 perror("async rconnect");
493 rdma_freeaddrinfo(rai);
503 buf = malloc(!custom ? test_size[TEST_CNT - 1].size : transfer_size);
510 ret = server_listen();
515 printf("%-10s%-8s%-8s%-8s%-8s%8s %10s%13s\n",
516 "name", "bytes", "xfers", "iters", "total", "time", "Gb/sec", "usec/xfer");
518 optimization = opt_latency;
519 ret = dst_addr ? client_connect() : server_connect();
523 for (i = 0; i < TEST_CNT && !fork_pid; i++) {
524 if (test_size[i].option > size_option)
526 init_latency_test(test_size[i].size);
530 waitpid(fork_pid, NULL, 0);
532 rs_shutdown(rs, SHUT_RDWR);
535 if (!dst_addr && use_fork && !fork_pid)
538 optimization = opt_bandwidth;
539 ret = dst_addr ? client_connect() : server_connect();
542 for (i = 0; i < TEST_CNT && !fork_pid; i++) {
543 if (test_size[i].option > size_option)
545 init_bandwidth_test(test_size[i].size);
549 ret = dst_addr ? client_connect() : server_connect();
558 waitpid(fork_pid, NULL, 0);
560 rs_shutdown(rs, SHUT_RDWR);
567 static int set_test_opt(const char *arg)
569 if (strlen(arg) == 1) {
578 flags = (flags & ~MSG_DONTWAIT) | MSG_WAITALL;
585 flags |= MSG_DONTWAIT;
597 if (!strncasecmp("socket", arg, 6)) {
599 } else if (!strncasecmp("async", arg, 5)) {
601 } else if (!strncasecmp("block", arg, 5)) {
602 flags = (flags & ~MSG_DONTWAIT) | MSG_WAITALL;
603 } else if (!strncasecmp("nonblock", arg, 8)) {
604 flags |= MSG_DONTWAIT;
605 } else if (!strncasecmp("resolve", arg, 7)) {
607 } else if (!strncasecmp("verify", arg, 6)) {
609 } else if (!strncasecmp("fork", arg, 4)) {
619 int main(int argc, char **argv)
623 ai_hints.ai_socktype = SOCK_STREAM;
624 rai_hints.ai_port_space = RDMA_PS_TCP;
625 while ((op = getopt(argc, argv, "s:b:f:B:i:I:C:S:p:k:T:")) != -1) {
634 if (!strncasecmp("ip", optarg, 2)) {
635 ai_hints.ai_flags = AI_NUMERICHOST;
636 } else if (!strncasecmp("gid", optarg, 3)) {
637 rai_hints.ai_flags = RAI_NUMERICHOST | RAI_FAMILY;
638 rai_hints.ai_family = AF_IB;
641 fprintf(stderr, "Warning: unknown address format\n");
645 buffer_size = atoi(optarg);
648 inline_size = atoi(optarg);
652 iterations = atoi(optarg);
656 transfer_count = atoi(optarg);
659 if (!strncasecmp("all", optarg, 3)) {
663 transfer_size = atoi(optarg);
670 keepalive = atoi(optarg);
673 if (!set_test_opt(optarg))
675 /* invalid option - fall through */
678 printf("usage: %s\n", argv[0]);
679 printf("\t[-s server_address]\n");
680 printf("\t[-b bind_address]\n");
681 printf("\t[-f address_format]\n");
682 printf("\t name, ip, ipv6, or gid\n");
683 printf("\t[-B buffer_size]\n");
684 printf("\t[-i inline_size]\n");
685 printf("\t[-I iterations]\n");
686 printf("\t[-C transfer_count]\n");
687 printf("\t[-S transfer_size or all]\n");
688 printf("\t[-p port_number]\n");
689 printf("\t[-k keepalive_time]\n");
690 printf("\t[-T test_option]\n");
691 printf("\t s|sockets - use standard tcp/ip sockets\n");
692 printf("\t a|async - asynchronous operation (use poll)\n");
693 printf("\t b|blocking - use blocking calls\n");
694 printf("\t f|fork - fork server processing\n");
695 printf("\t n|nonblocking - use nonblocking calls\n");
696 printf("\t r|resolve - use rdma cm to resolve address\n");
697 printf("\t v|verify - verify data\n");
702 if (!(flags & MSG_DONTWAIT))