2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39 #include <sys/types.h>
40 #include <sys/socket.h>
45 #include <arpa/inet.h>
51 PINGPONG_RECV_WRID = 1,
52 PINGPONG_SEND_WRID = 2,
59 struct pingpong_context {
60 struct ibv_context *context;
61 struct ibv_comp_channel *channel;
66 struct ibv_qp *qp[MAX_QP];
73 struct ibv_port_attr portinfo;
76 struct pingpong_dest {
83 static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu,
84 int sl, const struct pingpong_dest *my_dest,
85 const struct pingpong_dest *dest, int sgid_idx)
89 for (i = 0; i < ctx->num_qp; ++i) {
90 struct ibv_qp_attr attr = {
91 .qp_state = IBV_QPS_RTR,
93 .dest_qp_num = dest[i].qpn,
94 .rq_psn = dest[i].psn,
95 .max_dest_rd_atomic = 1,
106 if (dest->gid.global.interface_id) {
107 attr.ah_attr.is_global = 1;
108 attr.ah_attr.grh.hop_limit = 1;
109 attr.ah_attr.grh.dgid = dest->gid;
110 attr.ah_attr.grh.sgid_index = sgid_idx;
112 if (ibv_modify_qp(ctx->qp[i], &attr,
118 IBV_QP_MAX_DEST_RD_ATOMIC |
119 IBV_QP_MIN_RNR_TIMER)) {
120 fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i);
124 attr.qp_state = IBV_QPS_RTS;
128 attr.sq_psn = my_dest[i].psn;
129 attr.max_rd_atomic = 1;
130 if (ibv_modify_qp(ctx->qp[i], &attr,
136 IBV_QP_MAX_QP_RD_ATOMIC)) {
137 fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i);
145 static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
146 const struct pingpong_dest *my_dest)
148 struct addrinfo *res, *t;
149 struct addrinfo hints = {
150 .ai_family = AF_UNSPEC,
151 .ai_socktype = SOCK_STREAM
154 char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
159 struct pingpong_dest *rem_dest = NULL;
162 if (asprintf(&service, "%d", port) < 0)
165 n = getaddrinfo(servername, service, &hints, &res);
168 fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
173 for (t = res; t; t = t->ai_next) {
174 sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
176 if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
183 freeaddrinfo_null(res);
187 fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
191 for (i = 0; i < MAX_QP; ++i) {
192 gid_to_wire_gid(&my_dest[i].gid, gid);
193 sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid,
194 my_dest[i].qpn, my_dest[i].psn, gid);
195 if (write(sockfd, msg, sizeof msg) != sizeof msg) {
196 fprintf(stderr, "Couldn't send local address\n");
201 rem_dest = malloc(MAX_QP * sizeof *rem_dest);
205 for (i = 0; i < MAX_QP; ++i) {
207 while (n < sizeof msg) {
208 r = read(sockfd, msg + n, sizeof msg - n);
210 perror("client read");
211 fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
212 n, (int) sizeof msg, i);
218 sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn,
219 &rem_dest[i].psn, gid);
220 wire_gid_to_gid(gid, &rem_dest[i].gid);
223 if (write(sockfd, "done", sizeof "done") != sizeof "done") {
224 perror("client write");
232 static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
233 int ib_port, enum ibv_mtu mtu,
235 const struct pingpong_dest *my_dest,
238 struct addrinfo *res, *t;
239 struct addrinfo hints = {
240 .ai_flags = AI_PASSIVE,
241 .ai_family = AF_INET,
242 .ai_socktype = SOCK_STREAM
245 char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
249 int sockfd = -1, connfd;
250 struct pingpong_dest *rem_dest = NULL;
253 if (asprintf(&service, "%d", port) < 0)
256 n = getaddrinfo(NULL, service, &hints, &res);
259 fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
264 for (t = res; t; t = t->ai_next) {
265 sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
269 setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
271 if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
278 freeaddrinfo_null(res);
282 fprintf(stderr, "Couldn't listen to port %d\n", port);
287 connfd = accept(sockfd, NULL, NULL);
290 fprintf(stderr, "accept() failed\n");
294 rem_dest = malloc(MAX_QP * sizeof *rem_dest);
298 for (i = 0; i < MAX_QP; ++i) {
300 while (n < sizeof msg) {
301 r = read(connfd, msg + n, sizeof msg - n);
303 perror("server read");
304 fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
305 n, (int) sizeof msg, i);
311 sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn,
312 &rem_dest[i].psn, gid);
313 wire_gid_to_gid(gid, &rem_dest[i].gid);
316 if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest,
318 fprintf(stderr, "Couldn't connect to remote QP\n");
324 for (i = 0; i < MAX_QP; ++i) {
325 gid_to_wire_gid(&my_dest[i].gid, gid);
326 sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid,
327 my_dest[i].qpn, my_dest[i].psn, gid);
328 if (write(connfd, msg, sizeof msg) != sizeof msg) {
329 fprintf(stderr, "Couldn't send local address\n");
336 if (read(connfd, msg, sizeof msg) != sizeof "done") {
337 perror("client write");
348 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
349 int num_qp, int rx_depth, int port,
352 struct pingpong_context *ctx;
355 ctx = calloc(1, sizeof *ctx);
360 ctx->send_flags = IBV_SEND_SIGNALED;
361 ctx->num_qp = num_qp;
362 ctx->rx_depth = rx_depth;
364 ctx->buf = memalign(page_size, size);
366 fprintf(stderr, "Couldn't allocate work buf.\n");
370 memset(ctx->buf, 0, size);
372 ctx->context = ibv_open_device(ib_dev);
374 fprintf(stderr, "Couldn't get context for %s\n",
375 ibv_get_device_name(ib_dev));
380 ctx->channel = ibv_create_comp_channel(ctx->context);
382 fprintf(stderr, "Couldn't create completion channel\n");
388 ctx->pd = ibv_alloc_pd(ctx->context);
390 fprintf(stderr, "Couldn't allocate PD\n");
391 goto clean_comp_channel;
394 ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
396 fprintf(stderr, "Couldn't register MR\n");
400 ctx->cq = ibv_create_cq(ctx->context, rx_depth + num_qp, NULL,
403 fprintf(stderr, "Couldn't create CQ\n");
408 struct ibv_srq_init_attr attr = {
415 ctx->srq = ibv_create_srq(ctx->pd, &attr);
417 fprintf(stderr, "Couldn't create SRQ\n");
422 for (i = 0; i < num_qp; ++i) {
423 struct ibv_qp_attr attr;
424 struct ibv_qp_init_attr init_attr = {
432 .qp_type = IBV_QPT_RC
435 ctx->qp[i] = ibv_create_qp(ctx->pd, &init_attr);
437 fprintf(stderr, "Couldn't create QP[%d]\n", i);
440 ibv_query_qp(ctx->qp[i], &attr, IBV_QP_CAP, &init_attr);
441 if (init_attr.cap.max_inline_data >= size) {
442 ctx->send_flags |= IBV_SEND_INLINE;
446 for (i = 0; i < num_qp; ++i) {
447 struct ibv_qp_attr attr = {
448 .qp_state = IBV_QPS_INIT,
454 if (ibv_modify_qp(ctx->qp[i], &attr,
458 IBV_QP_ACCESS_FLAGS)) {
459 fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i);
470 for (--i; i >= 0; --i)
471 ibv_destroy_qp(ctx->qp[i]);
473 ibv_destroy_srq(ctx->srq);
476 ibv_destroy_cq(ctx->cq);
479 ibv_dereg_mr(ctx->mr);
482 ibv_dealloc_pd(ctx->pd);
486 ibv_destroy_comp_channel(ctx->channel);
489 ibv_close_device(ctx->context);
500 static int pp_close_ctx(struct pingpong_context *ctx, int num_qp)
504 for (i = 0; i < num_qp; ++i) {
505 if (ibv_destroy_qp(ctx->qp[i])) {
506 fprintf(stderr, "Couldn't destroy QP[%d]\n", i);
511 if (ibv_destroy_srq(ctx->srq)) {
512 fprintf(stderr, "Couldn't destroy SRQ\n");
516 if (ibv_destroy_cq(ctx->cq)) {
517 fprintf(stderr, "Couldn't destroy CQ\n");
521 if (ibv_dereg_mr(ctx->mr)) {
522 fprintf(stderr, "Couldn't deregister MR\n");
526 if (ibv_dealloc_pd(ctx->pd)) {
527 fprintf(stderr, "Couldn't deallocate PD\n");
532 if (ibv_destroy_comp_channel(ctx->channel)) {
533 fprintf(stderr, "Couldn't destroy completion channel\n");
538 if (ibv_close_device(ctx->context)) {
539 fprintf(stderr, "Couldn't release context\n");
549 static int pp_post_recv(struct pingpong_context *ctx, int n)
551 struct ibv_sge list = {
552 .addr = (uintptr_t) ctx->buf,
554 .lkey = ctx->mr->lkey
556 struct ibv_recv_wr wr = {
557 .wr_id = PINGPONG_RECV_WRID,
561 struct ibv_recv_wr *bad_wr;
564 for (i = 0; i < n; ++i)
565 if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr))
571 static int pp_post_send(struct pingpong_context *ctx, int qp_index)
573 struct ibv_sge list = {
574 .addr = (uintptr_t) ctx->buf,
576 .lkey = ctx->mr->lkey
578 struct ibv_send_wr wr = {
579 .wr_id = PINGPONG_SEND_WRID,
582 .opcode = IBV_WR_SEND,
583 .send_flags = ctx->send_flags,
585 struct ibv_send_wr *bad_wr;
587 return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr);
590 static int find_qp(int qpn, struct pingpong_context *ctx, int num_qp)
594 for (i = 0; i < num_qp; ++i)
595 if (ctx->qp[i]->qp_num == qpn)
601 static void usage(const char *argv0)
604 printf(" %s start a server and wait for connection\n", argv0);
605 printf(" %s <host> connect to server at <host>\n", argv0);
607 printf("Options:\n");
608 printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
609 printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
610 printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
611 printf(" -s, --size=<size> size of message to exchange (default 4096)\n");
612 printf(" -m, --mtu=<size> path MTU (default 1024)\n");
613 printf(" -q, --num-qp=<num> number of QPs to use (default 16)\n");
614 printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n");
615 printf(" -n, --iters=<iters> number of exchanges per QP(default 1000)\n");
616 printf(" -l, --sl=<sl> service level value\n");
617 printf(" -e, --events sleep on CQ events (default poll)\n");
618 printf(" -g, --gid-idx=<gid index> local port gid index\n");
621 int main(int argc, char *argv[])
623 struct ibv_device **dev_list;
624 struct ibv_device *ib_dev;
626 struct pingpong_context *ctx;
627 struct pingpong_dest my_dest[MAX_QP];
628 struct pingpong_dest *rem_dest;
629 struct timeval start, end;
630 char *ib_devname = NULL;
631 char *servername = NULL;
632 unsigned int port = 18515;
634 unsigned int size = 4096;
635 enum ibv_mtu mtu = IBV_MTU_1024;
636 unsigned int num_qp = 16;
637 unsigned int rx_depth = 500;
638 unsigned int iters = 1000;
644 int num_cq_events = 0;
649 srand48(getpid() * time(NULL));
654 static struct option long_options[] = {
655 { .name = "port", .has_arg = 1, .val = 'p' },
656 { .name = "ib-dev", .has_arg = 1, .val = 'd' },
657 { .name = "ib-port", .has_arg = 1, .val = 'i' },
658 { .name = "size", .has_arg = 1, .val = 's' },
659 { .name = "mtu", .has_arg = 1, .val = 'm' },
660 { .name = "num-qp", .has_arg = 1, .val = 'q' },
661 { .name = "rx-depth", .has_arg = 1, .val = 'r' },
662 { .name = "iters", .has_arg = 1, .val = 'n' },
663 { .name = "sl", .has_arg = 1, .val = 'l' },
664 { .name = "events", .has_arg = 0, .val = 'e' },
665 { .name = "gid-idx", .has_arg = 1, .val = 'g' },
669 c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:",
676 port = strtoul(optarg, NULL, 0);
684 ib_devname = strdupa(optarg);
688 ib_port = strtol(optarg, NULL, 0);
696 size = strtoul(optarg, NULL, 0);
704 mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0));
712 num_qp = strtoul(optarg, NULL, 0);
716 rx_depth = strtoul(optarg, NULL, 0);
720 iters = strtoul(optarg, NULL, 0);
724 sl = strtol(optarg, NULL, 0);
732 gidx = strtol(optarg, NULL, 0);
741 if (optind == argc - 1)
742 servername = strdupa(argv[optind]);
743 else if (optind < argc) {
748 if (num_qp > rx_depth) {
749 fprintf(stderr, "rx_depth %d is too small for %d QPs -- "
750 "must have at least one receive per QP.\n",
755 num_wc = num_qp + rx_depth;
756 wc = alloca(num_wc * sizeof *wc);
758 page_size = sysconf(_SC_PAGESIZE);
760 dev_list = ibv_get_device_list(NULL);
762 perror("Failed to get IB devices list");
769 fprintf(stderr, "No IB devices found\n");
773 for (i = 0; dev_list[i]; ++i)
774 if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
776 ib_dev = dev_list[i];
778 fprintf(stderr, "IB device %s not found\n", ib_devname);
783 ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port, use_event);
787 routs = pp_post_recv(ctx, ctx->rx_depth);
788 if (routs < ctx->rx_depth) {
789 fprintf(stderr, "Couldn't post receive (%d)\n", routs);
794 if (ibv_req_notify_cq(ctx->cq, 0)) {
795 fprintf(stderr, "Couldn't request CQ notification\n");
799 memset(my_dest, 0, sizeof my_dest);
801 if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
802 fprintf(stderr, "Couldn't get port info\n");
805 for (i = 0; i < num_qp; ++i) {
806 my_dest[i].qpn = ctx->qp[i]->qp_num;
807 my_dest[i].psn = lrand48() & 0xffffff;
808 my_dest[i].lid = ctx->portinfo.lid;
809 if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET
810 && !my_dest[i].lid) {
811 fprintf(stderr, "Couldn't get local LID\n");
816 if (ibv_query_gid(ctx->context, ib_port, gidx,
818 fprintf(stderr, "Could not get local gid for "
819 "gid index %d\n", gidx);
823 memset(&my_dest[i].gid, 0, sizeof my_dest[i].gid);
825 inet_ntop(AF_INET6, &my_dest[i].gid, gid, sizeof gid);
826 printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, "
827 "GID %s\n", my_dest[i].lid, my_dest[i].qpn,
828 my_dest[i].psn, gid);
832 rem_dest = pp_client_exch_dest(servername, port, my_dest);
834 rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl,
840 inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
842 for (i = 0; i < num_qp; ++i) {
843 inet_ntop(AF_INET6, &rem_dest[i].gid, gid, sizeof gid);
844 printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, "
845 "GID %s\n", rem_dest[i].lid, rem_dest[i].qpn,
846 rem_dest[i].psn, gid);
850 if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest,
855 for (i = 0; i < num_qp; ++i) {
856 if (pp_post_send(ctx, i)) {
857 fprintf(stderr, "Couldn't post send\n");
860 ctx->pending[i] = PINGPONG_SEND_WRID | PINGPONG_RECV_WRID;
863 for (i = 0; i < num_qp; ++i)
864 ctx->pending[i] = PINGPONG_RECV_WRID;
866 if (gettimeofday(&start, NULL)) {
867 perror("gettimeofday");
872 while (rcnt < iters || scnt < iters) {
874 struct ibv_cq *ev_cq;
877 if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
878 fprintf(stderr, "Failed to get cq_event\n");
884 if (ev_cq != ctx->cq) {
885 fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
889 if (ibv_req_notify_cq(ctx->cq, 0)) {
890 fprintf(stderr, "Couldn't request CQ notification\n");
899 ne = ibv_poll_cq(ctx->cq, num_wc, wc);
901 fprintf(stderr, "poll CQ failed %d\n", ne);
904 } while (!use_event && ne < 1);
906 for (i = 0; i < ne; ++i) {
907 if (wc[i].status != IBV_WC_SUCCESS) {
908 fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
909 ibv_wc_status_str(wc[i].status),
910 wc[i].status, (int) wc[i].wr_id);
914 qp_ind = find_qp(wc[i].qp_num, ctx, num_qp);
916 fprintf(stderr, "Couldn't find QPN %06x\n",
921 switch ((int) wc[i].wr_id) {
922 case PINGPONG_SEND_WRID:
926 case PINGPONG_RECV_WRID:
927 if (--routs <= num_qp) {
928 routs += pp_post_recv(ctx, ctx->rx_depth - routs);
929 if (routs < ctx->rx_depth) {
931 "Couldn't post receive (%d)\n",
941 fprintf(stderr, "Completion for unknown wr_id %d\n",
946 ctx->pending[qp_ind] &= ~(int) wc[i].wr_id;
947 if (scnt < iters && !ctx->pending[qp_ind]) {
948 if (pp_post_send(ctx, qp_ind)) {
949 fprintf(stderr, "Couldn't post send\n");
952 ctx->pending[qp_ind] = PINGPONG_RECV_WRID |
960 if (gettimeofday(&end, NULL)) {
961 perror("gettimeofday");
966 float usec = (end.tv_sec - start.tv_sec) * 1000000 +
967 (end.tv_usec - start.tv_usec);
968 long long bytes = (long long) size * iters * 2;
970 printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
971 bytes, usec / 1000000., bytes * 8. / usec);
972 printf("%d iters in %.2f seconds = %.2f usec/iter\n",
973 iters, usec / 1000000., usec / iters);
976 ibv_ack_cq_events(ctx->cq, num_cq_events);
978 if (pp_close_ctx(ctx, num_qp))
981 ibv_free_device_list(dev_list);