2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #endif /* HAVE_CONFIG_H */
41 #include <sys/types.h>
42 #include <sys/socket.h>
46 #include <arpa/inet.h>
52 PINGPONG_RECV_WRID = 1,
53 PINGPONG_SEND_WRID = 2,
60 struct pingpong_context {
61 struct ibv_context *context;
62 struct ibv_comp_channel *channel;
67 struct ibv_qp *qp[MAX_QP];
73 struct ibv_port_attr portinfo;
76 struct pingpong_dest {
83 static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
84 int sl, const struct pingpong_dest *my_dest,
85 const struct pingpong_dest *dest, int sgid_idx)
89 for (i = 0; i < ctx->num_qp; ++i) {
90 struct ibv_qp_attr attr = {
91 .qp_state = IBV_QPS_RTR,
93 .dest_qp_num = dest[i].qpn,
94 .rq_psn = dest[i].psn,
95 .max_dest_rd_atomic = 1,
106 if (dest->gid.global.interface_id) {
107 attr.ah_attr.is_global = 1;
108 attr.ah_attr.grh.hop_limit = 1;
109 attr.ah_attr.grh.dgid = dest->gid;
110 attr.ah_attr.grh.sgid_index = sgid_idx;
112 if (ibv_modify_qp(ctx->qp[i], &attr,
118 IBV_QP_MAX_DEST_RD_ATOMIC |
119 IBV_QP_MIN_RNR_TIMER)) {
120 fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i);
124 attr.qp_state = IBV_QPS_RTS;
128 attr.sq_psn = my_dest[i].psn;
129 attr.max_rd_atomic = 1;
130 if (ibv_modify_qp(ctx->qp[i], &attr,
136 IBV_QP_MAX_QP_RD_ATOMIC)) {
137 fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i);
145 static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
146 const struct pingpong_dest *my_dest)
148 struct addrinfo *res, *t;
149 struct addrinfo hints = {
150 .ai_family = AF_INET,
151 .ai_socktype = SOCK_STREAM
154 char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
159 struct pingpong_dest *rem_dest = NULL;
162 if (asprintf(&service, "%d", port) < 0)
165 n = getaddrinfo(servername, service, &hints, &res);
168 fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
173 for (t = res; t; t = t->ai_next) {
174 sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
176 if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
187 fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
191 for (i = 0; i < MAX_QP; ++i) {
192 gid_to_wire_gid(&my_dest[i].gid, gid);
193 sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid);
194 if (write(sockfd, msg, sizeof msg) != sizeof msg) {
195 fprintf(stderr, "Couldn't send local address\n");
200 rem_dest = malloc(MAX_QP * sizeof *rem_dest);
204 for (i = 0; i < MAX_QP; ++i) {
206 while (n < sizeof msg) {
207 r = read(sockfd, msg + n, sizeof msg - n);
209 perror("client read");
210 fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
211 n, (int) sizeof msg, i);
217 sscanf(msg, "%x:%x:%x:%s",
218 &rem_dest[i].lid, &rem_dest[i].qpn, &rem_dest[i].psn, gid);
219 wire_gid_to_gid(gid, &rem_dest[i].gid);
222 write(sockfd, "done", sizeof "done");
229 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
230 int ib_port, enum ibv_mtu mtu,
232 const struct pingpong_dest *my_dest,
235 struct addrinfo *res, *t;
236 struct addrinfo hints = {
237 .ai_flags = AI_PASSIVE,
238 .ai_family = AF_INET,
239 .ai_socktype = SOCK_STREAM
242 char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
246 int sockfd = -1, connfd;
247 struct pingpong_dest *rem_dest = NULL;
250 if (asprintf(&service, "%d", port) < 0)
253 n = getaddrinfo(NULL, service, &hints, &res);
256 fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
261 for (t = res; t; t = t->ai_next) {
262 sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
266 setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
268 if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
279 fprintf(stderr, "Couldn't listen to port %d\n", port);
284 connfd = accept(sockfd, NULL, 0);
287 fprintf(stderr, "accept() failed\n");
291 rem_dest = malloc(MAX_QP * sizeof *rem_dest);
295 for (i = 0; i < MAX_QP; ++i) {
297 while (n < sizeof msg) {
298 r = read(connfd, msg + n, sizeof msg - n);
300 perror("server read");
301 fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
302 n, (int) sizeof msg, i);
308 sscanf(msg, "%x:%x:%x:%s",
309 &rem_dest[i].lid, &rem_dest[i].qpn, &rem_dest[i].psn, gid);
310 wire_gid_to_gid(gid, &rem_dest[i].gid);
313 if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, sgid_idx)) {
314 fprintf(stderr, "Couldn't connect to remote QP\n");
320 for (i = 0; i < MAX_QP; ++i) {
321 gid_to_wire_gid(&my_dest[i].gid, gid);
322 sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid);
323 if (write(connfd, msg, sizeof msg) != sizeof msg) {
324 fprintf(stderr, "Couldn't send local address\n");
331 read(connfd, msg, sizeof msg);
338 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
339 int num_qp, int rx_depth, int port,
342 struct pingpong_context *ctx;
345 ctx = calloc(1, sizeof *ctx);
350 ctx->num_qp = num_qp;
351 ctx->rx_depth = rx_depth;
353 ctx->buf = malloc(roundup(size, page_size));
355 fprintf(stderr, "Couldn't allocate work buf.\n");
359 memset(ctx->buf, 0, size);
361 ctx->context = ibv_open_device(ib_dev);
363 fprintf(stderr, "Couldn't get context for %s\n",
364 ibv_get_device_name(ib_dev));
369 ctx->channel = ibv_create_comp_channel(ctx->context);
371 fprintf(stderr, "Couldn't create completion channel\n");
377 ctx->pd = ibv_alloc_pd(ctx->context);
379 fprintf(stderr, "Couldn't allocate PD\n");
383 ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
385 fprintf(stderr, "Couldn't register MR\n");
389 ctx->cq = ibv_create_cq(ctx->context, rx_depth + num_qp, NULL,
392 fprintf(stderr, "Couldn't create CQ\n");
397 struct ibv_srq_init_attr attr = {
404 ctx->srq = ibv_create_srq(ctx->pd, &attr);
406 fprintf(stderr, "Couldn't create SRQ\n");
411 for (i = 0; i < num_qp; ++i) {
412 struct ibv_qp_init_attr attr = {
420 .qp_type = IBV_QPT_RC
423 ctx->qp[i] = ibv_create_qp(ctx->pd, &attr);
425 fprintf(stderr, "Couldn't create QP[%d]\n", i);
430 for (i = 0; i < num_qp; ++i) {
431 struct ibv_qp_attr attr = {
432 .qp_state = IBV_QPS_INIT,
438 if (ibv_modify_qp(ctx->qp[i], &attr,
442 IBV_QP_ACCESS_FLAGS)) {
443 fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i);
451 int pp_close_ctx(struct pingpong_context *ctx, int num_qp)
455 for (i = 0; i < num_qp; ++i) {
456 if (ibv_destroy_qp(ctx->qp[i])) {
457 fprintf(stderr, "Couldn't destroy QP[%d]\n", i);
462 if (ibv_destroy_srq(ctx->srq)) {
463 fprintf(stderr, "Couldn't destroy SRQ\n");
467 if (ibv_destroy_cq(ctx->cq)) {
468 fprintf(stderr, "Couldn't destroy CQ\n");
472 if (ibv_dereg_mr(ctx->mr)) {
473 fprintf(stderr, "Couldn't deregister MR\n");
477 if (ibv_dealloc_pd(ctx->pd)) {
478 fprintf(stderr, "Couldn't deallocate PD\n");
483 if (ibv_destroy_comp_channel(ctx->channel)) {
484 fprintf(stderr, "Couldn't destroy completion channel\n");
489 if (ibv_close_device(ctx->context)) {
490 fprintf(stderr, "Couldn't release context\n");
500 static int pp_post_recv(struct pingpong_context *ctx, int n)
502 struct ibv_sge list = {
503 .addr = (uintptr_t) ctx->buf,
505 .lkey = ctx->mr->lkey
507 struct ibv_recv_wr wr = {
508 .wr_id = PINGPONG_RECV_WRID,
512 struct ibv_recv_wr *bad_wr;
515 for (i = 0; i < n; ++i)
516 if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr))
522 static int pp_post_send(struct pingpong_context *ctx, int qp_index)
524 struct ibv_sge list = {
525 .addr = (uintptr_t) ctx->buf,
527 .lkey = ctx->mr->lkey
529 struct ibv_send_wr wr = {
530 .wr_id = PINGPONG_SEND_WRID,
533 .opcode = IBV_WR_SEND,
534 .send_flags = IBV_SEND_SIGNALED,
536 struct ibv_send_wr *bad_wr;
538 return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr);
541 static int find_qp(int qpn, struct pingpong_context *ctx, int num_qp)
545 for (i = 0; i < num_qp; ++i)
546 if (ctx->qp[i]->qp_num == qpn)
552 static void usage(const char *argv0)
555 printf(" %s start a server and wait for connection\n", argv0);
556 printf(" %s <host> connect to server at <host>\n", argv0);
558 printf("Options:\n");
559 printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
560 printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
561 printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
562 printf(" -s, --size=<size> size of message to exchange (default 4096)\n");
563 printf(" -m, --mtu=<size> path MTU (default 1024)\n");
564 printf(" -q, --num-qp=<num> number of QPs to use (default 16)\n");
565 printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n");
566 printf(" -n, --iters=<iters> number of exchanges per QP(default 1000)\n");
567 printf(" -l, --sl=<sl> service level value\n");
568 printf(" -e, --events sleep on CQ events (default poll)\n");
569 printf(" -g, --gid-idx=<gid index> local port gid index\n");
572 int main(int argc, char *argv[])
574 struct ibv_device **dev_list;
575 struct ibv_device *ib_dev;
577 struct pingpong_context *ctx;
578 struct pingpong_dest my_dest[MAX_QP];
579 struct pingpong_dest *rem_dest;
580 struct timeval start, end;
581 char *ib_devname = NULL;
582 char *servername = NULL;
586 enum ibv_mtu mtu = IBV_MTU_1024;
595 int num_cq_events = 0;
600 srand48(getpid() * time(NULL));
605 static struct option long_options[] = {
606 { .name = "port", .has_arg = 1, .val = 'p' },
607 { .name = "ib-dev", .has_arg = 1, .val = 'd' },
608 { .name = "ib-port", .has_arg = 1, .val = 'i' },
609 { .name = "size", .has_arg = 1, .val = 's' },
610 { .name = "mtu", .has_arg = 1, .val = 'm' },
611 { .name = "num-qp", .has_arg = 1, .val = 'q' },
612 { .name = "rx-depth", .has_arg = 1, .val = 'r' },
613 { .name = "iters", .has_arg = 1, .val = 'n' },
614 { .name = "sl", .has_arg = 1, .val = 'l' },
615 { .name = "events", .has_arg = 0, .val = 'e' },
616 { .name = "gid-idx", .has_arg = 1, .val = 'g' },
620 c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:", long_options, NULL);
626 port = strtol(optarg, NULL, 0);
627 if (port < 0 || port > 65535) {
634 ib_devname = strdup(optarg);
638 ib_port = strtol(optarg, NULL, 0);
646 size = strtol(optarg, NULL, 0);
650 mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
658 num_qp = strtol(optarg, NULL, 0);
662 rx_depth = strtol(optarg, NULL, 0);
666 iters = strtol(optarg, NULL, 0);
670 sl = strtol(optarg, NULL, 0);
678 gidx = strtol(optarg, NULL, 0);
687 if (optind == argc - 1)
688 servername = strdup(argv[optind]);
689 else if (optind < argc) {
694 if (num_qp > rx_depth) {
695 fprintf(stderr, "rx_depth %d is too small for %d QPs -- "
696 "must have at least one receive per QP.\n",
701 num_wc = num_qp + rx_depth;
702 wc = alloca(num_wc * sizeof *wc);
704 page_size = sysconf(_SC_PAGESIZE);
706 dev_list = ibv_get_device_list(NULL);
708 perror("Failed to get IB devices list");
715 fprintf(stderr, "No IB devices found\n");
720 for (i = 0; dev_list[i]; ++i)
721 if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
723 ib_dev = dev_list[i];
725 fprintf(stderr, "IB device %s not found\n", ib_devname);
730 ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port, use_event);
734 routs = pp_post_recv(ctx, ctx->rx_depth);
735 if (routs < ctx->rx_depth) {
736 fprintf(stderr, "Couldn't post receive (%d)\n", routs);
741 if (ibv_req_notify_cq(ctx->cq, 0)) {
742 fprintf(stderr, "Couldn't request CQ notification\n");
746 memset(my_dest, 0, sizeof my_dest);
748 if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
749 fprintf(stderr, "Couldn't get port info\n");
752 for (i = 0; i < num_qp; ++i) {
753 my_dest[i].qpn = ctx->qp[i]->qp_num;
754 my_dest[i].psn = lrand48() & 0xffffff;
755 my_dest[i].lid = ctx->portinfo.lid;
756 if (ctx->portinfo.link_layer == IBV_LINK_LAYER_INFINIBAND && !my_dest[i].lid) {
757 fprintf(stderr, "Couldn't get local LID\n");
762 if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest[i].gid)) {
763 fprintf(stderr, "Could not get local gid for gid index %d\n", gidx);
767 memset(&my_dest[i].gid, 0, sizeof my_dest[i].gid);
769 inet_ntop(AF_INET6, &my_dest[i].gid, gid, sizeof gid);
770 printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
771 my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid);
775 rem_dest = pp_client_exch_dest(servername, port, my_dest);
777 rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, my_dest, gidx);
782 inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
784 for (i = 0; i < num_qp; ++i) {
785 inet_ntop(AF_INET6, &rem_dest[i].gid, gid, sizeof gid);
786 printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
787 rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn, gid);
791 if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, gidx))
795 for (i = 0; i < num_qp; ++i) {
796 if (pp_post_send(ctx, i)) {
797 fprintf(stderr, "Couldn't post send\n");
800 ctx->pending[i] = PINGPONG_SEND_WRID | PINGPONG_RECV_WRID;
803 for (i = 0; i < num_qp; ++i)
804 ctx->pending[i] = PINGPONG_RECV_WRID;
806 if (gettimeofday(&start, NULL)) {
807 perror("gettimeofday");
812 while (rcnt < iters || scnt < iters) {
814 struct ibv_cq *ev_cq;
817 if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
818 fprintf(stderr, "Failed to get cq_event\n");
824 if (ev_cq != ctx->cq) {
825 fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
829 if (ibv_req_notify_cq(ctx->cq, 0)) {
830 fprintf(stderr, "Couldn't request CQ notification\n");
839 ne = ibv_poll_cq(ctx->cq, num_wc, wc);
841 fprintf(stderr, "poll CQ failed %d\n", ne);
844 } while (!use_event && ne < 1);
846 for (i = 0; i < ne; ++i) {
847 if (wc[i].status != IBV_WC_SUCCESS) {
848 fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
849 ibv_wc_status_str(wc[i].status),
850 wc[i].status, (int) wc[i].wr_id);
854 qp_ind = find_qp(wc[i].qp_num, ctx, num_qp);
856 fprintf(stderr, "Couldn't find QPN %06x\n",
861 switch ((int) wc[i].wr_id) {
862 case PINGPONG_SEND_WRID:
866 case PINGPONG_RECV_WRID:
867 if (--routs <= num_qp) {
868 routs += pp_post_recv(ctx, ctx->rx_depth - routs);
869 if (routs < ctx->rx_depth) {
871 "Couldn't post receive (%d)\n",
881 fprintf(stderr, "Completion for unknown wr_id %d\n",
886 ctx->pending[qp_ind] &= ~(int) wc[i].wr_id;
887 if (scnt < iters && !ctx->pending[qp_ind]) {
888 if (pp_post_send(ctx, qp_ind)) {
889 fprintf(stderr, "Couldn't post send\n");
892 ctx->pending[qp_ind] = PINGPONG_RECV_WRID |
900 if (gettimeofday(&end, NULL)) {
901 perror("gettimeofday");
906 float usec = (end.tv_sec - start.tv_sec) * 1000000 +
907 (end.tv_usec - start.tv_usec);
908 long long bytes = (long long) size * iters * 2;
910 printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
911 bytes, usec / 1000000., bytes * 8. / usec);
912 printf("%d iters in %.2f seconds = %.2f usec/iter\n",
913 iters, usec / 1000000., usec / iters);
916 ibv_ack_cq_events(ctx->cq, num_cq_events);
918 if (pp_close_ctx(ctx, num_qp))
921 ibv_free_device_list(dev_list);