tools/tools/netrate/juggle/juggle.c

   1 /*-
   2  * Copyright (c) 2005 Robert N. M. Watson
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  *
  26  * $FreeBSD$
  27  */
  28
  29 #include <sys/types.h>
  30 #include <sys/socket.h>
  31 #include <sys/stdint.h>
  32 #include <sys/time.h>
  33 #include <sys/utsname.h>
  34 #include <sys/wait.h>
  35
  36 #include <netinet/in.h>
  37
  38 #include <err.h>
  39 #include <errno.h>
  40 #include <pthread.h>
  41 #include <signal.h>
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45 #include <unistd.h>
  46
  47 /*
  48  * juggle is a simple IPC/context switch performance test, which works on
  49  * pairs of file descriptors of various types.  In various runs, it considers
  50  * the cost of bouncing a message synchronously across the descriptor pair,
  51  * either in the same thread, two different threads, or two different
  52  * processes.  Timing measurements for each series of I/O's are reported, but
  53  * the first measurement in each series discarded as "warmup" on the IPC
  54  * primitive.  Variations on the test permit for pipelining, or the insertion
  55  * of more than one packet into the stream at a time, intended to permit
  56  * greater parallelism, hopefully allowing performance numbers to reflect
  57  * use of available parallelism, and/or intelligence in context switching to
  58  * avoid premature switching when multiple messages are queued.
  59  */
  60
  61 /*
  62  * The UDP test uses UDP over the loopback interface.  Two arbitrary but
  63  * fixed port numbers.
  64  */
  65 #define UDP_PORT1       2020
  66 #define UDP_PORT2       2021
  67
  68 /*
  69  * Size of each message.  Must be smaller than the socket buffer or pipe
  70  * buffer maximum size, as we want to send it atomically without blocking.
  71  * If pipelining is in use, must be able to fit PIPELINE_MAX of these
  72  * messages into the send queue.
  73  */
  74 #define MESSAGELEN      128
  75
  76 /*
  77  * Number of message cycles -- into fd1, out of fd2, into fd2, and out of
  78  * fd1.  By counting in cycles, we allow the master thread or process to
  79  * perform timing without explicitly synchronizing with the secondary thread
  80  * or process.
  81  */
  82 #define NUMCYCLES       1024
  83
  84 /*
  85  * Number of times to run each test.
  86  */
  87 #define LOOPS           10
  88
  89 /*
  90  * Number of in-flight messages per cycle.  I adjusting this value, be
  91  * careful not to exceed the socket/etc buffer depth, or messages may be lost
  92  * or result in blocking.
  93  */
  94 #define PIPELINE_MAX    4
  95
  96 static int
  97 udp_create(int *fd1p, int *fd2p)
  98 {
  99         struct sockaddr_in sin1, sin2;
 100         int sock1, sock2;
 101
 102         sock1 = socket(PF_INET, SOCK_DGRAM, 0);
 103         if (sock1 == -1)
 104                 return (-1);
 105
 106         sock2 = socket(PF_INET, SOCK_DGRAM, 0);
 107         if (sock2 == -1) {
 108                 close(sock1);
 109                 return (-1);
 110         }
 111
 112         bzero(&sin1, sizeof(sin1));
 113         sin1.sin_len = sizeof(sin1);
 114         sin1.sin_family = AF_INET;
 115         sin1.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 116         sin1.sin_port = htons(UDP_PORT1);
 117
 118         bzero(&sin2, sizeof(sin2));
 119         sin2.sin_len = sizeof(sin2);
 120         sin2.sin_family = AF_INET;
 121         sin2.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 122         sin2.sin_port = htons(UDP_PORT2);
 123
 124         if (bind(sock1, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
 125                 close(sock1);
 126                 close(sock2);
 127                 return (-1);
 128         }
 129
 130         if (bind(sock2, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
 131                 close(sock1);
 132                 close(sock2);
 133                 return (-1);
 134         }
 135
 136         if (connect(sock1, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
 137                 close(sock1);
 138                 close(sock2);
 139                 return (-1);
 140         }
 141
 142         if (connect(sock2, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
 143                 close(sock1);
 144                 close(sock2);
 145                 return (-1);
 146         }
 147
 148         *fd1p = sock1;
 149         *fd2p = sock2;
 150
 151         return (0);
 152 }
 153
 154 static int
 155 pipe_create(int *fd1p, int *fd2p)
 156 {
 157         int fds[2];
 158
 159         if (pipe(fds) < 0)
 160                 return (-1);
 161
 162         *fd1p = fds[0];
 163         *fd2p = fds[1];
 164
 165         return (0);
 166 }
 167
 168 static int
 169 socketpairdgram_create(int *fd1p, int *fd2p)
 170 {
 171         int fds[2];
 172
 173         if (socketpair(PF_LOCAL, SOCK_DGRAM, 0, fds) < 0)
 174                 return (-1);
 175
 176         *fd1p = fds[0];
 177         *fd2p = fds[1];
 178
 179         return (0);
 180 }
 181
 182 static int
 183 socketpairstream_create(int *fd1p, int *fd2p)
 184 {
 185         int fds[2];
 186
 187         if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fds) < 0)
 188                 return (-1);
 189
 190         *fd1p = fds[0];
 191         *fd2p = fds[1];
 192
 193         return (0);
 194 }
 195
 196 static int
 197 message_send(int s)
 198 {
 199         u_char buffer[MESSAGELEN];
 200         ssize_t len;
 201
 202         bzero(buffer, sizeof(buffer));
 203
 204         len = write(s, buffer, sizeof(buffer));
 205         if (len == -1)
 206                 return (-1);
 207         if (len != sizeof(buffer)) {
 208                 errno = EMSGSIZE;
 209                 return (-1);
 210         }
 211         return (0);
 212 }
 213
 214 static int
 215 message_recv(int s)
 216 {
 217         u_char buffer[MESSAGELEN];
 218         ssize_t len;
 219
 220         len = read(s, buffer, sizeof(buffer));
 221         if (len == -1)
 222                 return (-1);
 223         if (len != sizeof(buffer)) {
 224                 errno = EMSGSIZE;
 225                 return (-1);
 226         }
 227         return (0);
 228 }
 229
 230 /*
 231  * Juggle messages between two file descriptors in a single thread/process,
 232  * so simply a measure of IPC performance.
 233  */
 234 static struct timespec
 235 juggle(int fd1, int fd2, int pipeline)
 236 {
 237         struct timespec tstart, tfinish;
 238         int i, j;
 239
 240         if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
 241                 err(-1, "juggle: clock_gettime");
 242
 243         for (i = 0; i < NUMCYCLES; i++) {
 244
 245                 for (j = 0; j < pipeline; j++) {
 246                         if (message_send(fd1) < 0)
 247                                 err(-1, "message_send fd1");
 248                 }
 249
 250                 for (j = 0; j < pipeline; j++) {
 251                         if (message_recv(fd2) < 0)
 252                                 err(-1, "message_recv fd2");
 253
 254                         if (message_send(fd2) < 0)
 255                                 err(-1, "message_send fd2");
 256                 }
 257
 258                 for (j = 0; j < pipeline; j++) {
 259                         if (message_recv(fd1) < 0)
 260                                 err(-1, "message_recv fd1");
 261                 }
 262         }
 263
 264         if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
 265                 err(-1, "juggle: clock_gettime");
 266
 267         timespecsub(&tfinish, &tstart, &tfinish);
 268
 269         return (tfinish);
 270 }
 271
 272 /*
 273  * Juggle messages between two file descriptors in two threads, so measure
 274  * the cost of IPC and the cost of a thread context switch.
 275  *
 276  * In order to avoid measuring thread creation time, we make use of a
 277  * condition variable to decide when both threads are ready to begin
 278  * juggling.
 279  */
 280 static int threaded_child_ready;
 281 static pthread_mutex_t threaded_mtx;
 282 static pthread_cond_t threaded_cond;
 283 static int threaded_pipeline;
 284
 285 static void *
 286 juggling_thread(void *arg)
 287 {
 288         int fd2, i, j;
 289
 290         fd2 = *(int *)arg;
 291
 292         if (pthread_mutex_lock(&threaded_mtx) != 0)
 293                 err(-1, "juggling_thread: pthread_mutex_lock");
 294
 295         threaded_child_ready = 1;
 296
 297         if (pthread_cond_signal(&threaded_cond) != 0)
 298                 err(-1, "juggling_thread: pthread_cond_signal");
 299
 300         if (pthread_mutex_unlock(&threaded_mtx) != 0)
 301                 err(-1, "juggling_thread: pthread_mutex_unlock");
 302
 303         for (i = 0; i < NUMCYCLES; i++) {
 304                 for (j = 0; j < threaded_pipeline; j++) {
 305                         if (message_recv(fd2) < 0)
 306                                 err(-1, "message_recv fd2");
 307
 308                         if (message_send(fd2) < 0)
 309                                 err(-1, "message_send fd2");
 310                 }
 311         }
 312
 313         return (NULL);
 314 }
 315
 316 static struct timespec
 317 thread_juggle(int fd1, int fd2, int pipeline)
 318 {
 319         struct timespec tstart, tfinish;
 320         pthread_t thread;
 321         int i, j;
 322
 323         threaded_pipeline = pipeline;
 324
 325         if (pthread_mutex_init(&threaded_mtx, NULL) != 0)
 326                 err(-1, "thread_juggle: pthread_mutex_init");
 327
 328         if (pthread_create(&thread, NULL, juggling_thread, &fd2) != 0)
 329                 err(-1, "thread_juggle: pthread_create");
 330
 331         if (pthread_mutex_lock(&threaded_mtx) != 0)
 332                 err(-1, "thread_juggle: pthread_mutex_lock");
 333
 334         while (!threaded_child_ready) {
 335                 if (pthread_cond_wait(&threaded_cond, &threaded_mtx) != 0)
 336                         err(-1, "thread_juggle: pthread_cond_wait");
 337         }
 338
 339         if (pthread_mutex_unlock(&threaded_mtx) != 0)
 340                 err(-1, "thread_juggle: pthread_mutex_unlock");
 341
 342         if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
 343                 err(-1, "thread_juggle: clock_gettime");
 344
 345         for (i = 0; i < NUMCYCLES; i++) {
 346                 for (j = 0; j < pipeline; j++) {
 347                         if (message_send(fd1) < 0)
 348                                 err(-1, "message_send fd1");
 349                 }
 350
 351                 for (j = 0; j < pipeline; j++) {
 352                         if (message_recv(fd1) < 0)
 353                                 err(-1, "message_recv fd1");
 354                 }
 355         }
 356
 357         if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
 358                 err(-1, "thread_juggle: clock_gettime");
 359
 360         if (pthread_join(thread, NULL) != 0)
 361                 err(-1, "thread_juggle: pthread_join");
 362
 363         timespecsub(&tfinish, &tstart, &tfinish);
 364
 365         return (tfinish);
 366 }
 367
 368 /*
 369  * Juggle messages between two file descriptors in two processes, so measure
 370  * the cost of IPC and the cost of a process context switch.
 371  *
 372  * Since we can't use a mutex between the processes, we simply do an extra
 373  * write on the child to let the parent know that it's ready to start.
 374  */
 375 static struct timespec
 376 process_juggle(int fd1, int fd2, int pipeline)
 377 {
 378         struct timespec tstart, tfinish;
 379         pid_t pid, ppid, wpid;
 380         int error, i, j;
 381
 382         ppid = getpid();
 383
 384         pid = fork();
 385         if (pid < 0)
 386                 err(-1, "process_juggle: fork");
 387
 388         if (pid == 0) {
 389                 if (message_send(fd2) < 0) {
 390                         error = errno;
 391                         kill(ppid, SIGTERM);
 392                         errno = error;
 393                         err(-1, "process_juggle: child: message_send");
 394                 }
 395
 396                 for (i = 0; i < NUMCYCLES; i++) {
 397                         for (j = 0; j < pipeline; j++) {
 398                                 if (message_send(fd2) < 0)
 399                                         err(-1, "message_send fd2");
 400
 401                                 if (message_recv(fd2) < 0)
 402                                         err(-1, "message_recv fd2");
 403                         }
 404                 }
 405
 406                 exit(0);
 407         } else {
 408                 if (message_recv(fd1) < 0) {
 409                         error = errno;
 410                         kill(pid, SIGTERM);
 411                         errno = error;
 412                         err(-1, "process_juggle: parent: message_recv");
 413                 }
 414
 415                 if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
 416                         err(-1, "process_juggle: clock_gettime");
 417
 418                 for (i = 0; i < NUMCYCLES; i++) {
 419                         for (j = 0; j < pipeline; j++) {
 420                                 if (message_send(fd1) < 0) {
 421                                         error = errno;
 422                                         kill(pid, SIGTERM);
 423                                         errno = error;
 424                                         err(-1, "message_send fd1");
 425                                 }
 426                         }
 427
 428                         for (j = 0; j < pipeline; j++) {
 429                                 if (message_recv(fd1) < 0) {
 430                                         error = errno;
 431                                         kill(pid, SIGTERM);
 432                                         errno = error;
 433                                         err(-1, "message_recv fd1");
 434                                 }
 435                         }
 436                 }
 437
 438                 if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
 439                         err(-1, "process_juggle: clock_gettime");
 440         }
 441
 442         wpid = waitpid(pid, NULL, 0);
 443         if (wpid < 0)
 444                 err(-1, "process_juggle: waitpid");
 445         if (wpid != pid)
 446                 errx(-1, "process_juggle: waitpid: pid != wpid");
 447
 448         timespecsub(&tfinish, &tstart, &tfinish);
 449
 450         return (tfinish);
 451 }
 452
 453 /*
 454  * When we print out results for larger pipeline sizes, we scale back by the
 455  * depth of the pipeline.  This generally means dividing by the pipeline
 456  * depth.  Except when it means dividing by zero.
 457  */
 458 static void
 459 scale_timespec(struct timespec *ts, int p)
 460 {
 461
 462         if (p == 0)
 463                 return;
 464
 465         ts->tv_sec /= p;
 466         ts->tv_nsec /= p;
 467 }
 468
 469 static const struct ipctype {
 470         int             (*it_create)(int *fd1p, int *fd2p);
 471         const char      *it_name;
 472 } ipctypes[] = {
 473         { pipe_create, "pipe" },
 474         { udp_create, "udp" },
 475         { socketpairdgram_create, "socketpairdgram" },
 476         { socketpairstream_create, "socketpairstream" },
 477 };
 478 static const int ipctypes_len = (sizeof(ipctypes) / sizeof(struct ipctype));
 479
 480 int
 481 main(int argc, char *argv[])
 482 {
 483         struct timespec juggle_results[LOOPS], process_results[LOOPS];
 484         struct timespec thread_results[LOOPS];
 485         int fd1, fd2, i, j, p;
 486         struct utsname uts;
 487
 488         printf("version, juggle.c %s\n", "$FreeBSD$");
 489
 490         if (uname(&uts) < 0)
 491                 err(-1, "utsname");
 492         printf("sysname, %s\n", uts.sysname);
 493         printf("nodename, %s\n", uts.nodename);
 494         printf("release, %s\n", uts.release);
 495         printf("version, %s\n", uts.version);
 496         printf("machine, %s\n", uts.machine);
 497         printf("\n");
 498
 499         printf("MESSAGELEN, %d\n", MESSAGELEN);
 500         printf("NUMCYCLES, %d\n", NUMCYCLES);
 501         printf("LOOPS, %d\n", LOOPS);
 502         printf("PIPELINE_MAX, %d\n", PIPELINE_MAX);
 503         printf("\n\n");
 504
 505         printf("ipctype, test, pipeline_depth");
 506         for (j = 0; j < LOOPS; j++)
 507                 printf(", data%d", j);
 508         printf("\n");
 509         fflush(stdout);
 510         for (p = 0; p < PIPELINE_MAX + 1; p++) {
 511                 for (i = 0; i < ipctypes_len; i++) {
 512                         if (ipctypes[i].it_create(&fd1, &fd2) < 0)
 513                                 err(-1, "main: %s", ipctypes[i].it_name);
 514
 515                         /*
 516                          * For each test, do one uncounted warmup, then LOOPS
 517                          * runs of the actual test.
 518                          */
 519                         juggle(fd1, fd2, p);
 520                         for (j = 0; j < LOOPS; j++)
 521                                 juggle_results[j] = juggle(fd1, fd2, p);
 522                         process_juggle(fd1, fd2, p);
 523                         for (j = 0; j < LOOPS; j++)
 524                                 process_results[j] = process_juggle(fd1, fd2,
 525                                     p);
 526                         thread_juggle(fd1, fd2, p);
 527                         for (j = 0; j < LOOPS; j++)
 528                                 thread_results[j] = thread_juggle(fd1, fd2,
 529                                     p);
 530                         for (j = 0; j < LOOPS; j++) {
 531                                 thread_results[j].tv_sec = 0;
 532                                 thread_results[j].tv_nsec = 0;
 533                         }
 534                         close(fd1);
 535                         close(fd2);
 536                 }
 537                 /*
 538                  * When printing results for the round, normalize the results
 539                  * with respect to the pipeline depth.  We're doing p times
 540                  * as much work, and are we taking p times as long?
 541                  */
 542                 for (i = 0; i < ipctypes_len; i++) {
 543                         printf("%s, juggle, %d, ", ipctypes[i].it_name, p);
 544                         for (j = 0; j < LOOPS; j++) {
 545                                 if (j != 0)
 546                                         printf(", ");
 547                                 scale_timespec(&juggle_results[j], p);
 548                                 printf("%jd.%09lu",
 549                                     (intmax_t)juggle_results[j].tv_sec,
 550                                     juggle_results[j].tv_nsec);
 551                         }
 552                         printf("\n");
 553                         printf("%s, process_juggle, %d, ",
 554                             ipctypes[i].it_name, p);
 555                         for (j = 0; j < LOOPS; j++) {
 556                                 if (j != 0)
 557                                         printf(", ");
 558                                 scale_timespec(&process_results[j], p);
 559                                 printf("%jd.%09lu",
 560                                     (intmax_t)process_results[j].tv_sec,
 561                                     process_results[j].tv_nsec);
 562                         }
 563                         printf("\n");
 564                         printf("%s, thread_juggle, %d, ",
 565                             ipctypes[i].it_name, p);
 566                         for (j = 0; j < LOOPS; j++) {
 567                                 if (j != 0)
 568                                         printf(", ");
 569                                 scale_timespec(&thread_results[j], p);
 570                                 printf("%jd.%09lu",
 571                                     (intmax_t)thread_results[j].tv_sec,
 572                                     thread_results[j].tv_nsec);
 573                         }
 574                         printf("\n");
 575                 }
 576                 fflush(stdout);
 577         }
 578         return (0);
 579 }