tools/tools/netrate/juggle/juggle.c

   1 /*-
   2  * Copyright (c) 2005 Robert N. M. Watson
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  *
  26  * $FreeBSD$
  27  */
  28
  29 #include <sys/types.h>
  30 #include <sys/socket.h>
  31 #include <sys/time.h>
  32 #include <sys/utsname.h>
  33 #include <sys/wait.h>
  34
  35 #include <netinet/in.h>
  36
  37 #include <err.h>
  38 #include <errno.h>
  39 #include <pthread.h>
  40 #include <signal.h>
  41 #include <stdio.h>
  42 #include <stdlib.h>
  43 #include <string.h>
  44 #include <unistd.h>
  45
  46 /*
  47  * juggle is a simple IPC/context switch performance test, which works on
  48  * pairs of file descriptors of various types.  In various runs, it considers
  49  * the cost of bouncing a message synchronously across the descriptor pair,
  50  * either in the same thread, two different threads, or two different
  51  * processes.  Timing measurements for each series of I/O's are reported, but
  52  * the first measurement in each series discarded as "warmup" on the IPC
  53  * primitive.  Variations on the test permit for pipelining, or the insertion
  54  * of more than one packet into the stream at a time, intended to permit
  55  * greater parallelism, hopefully allowing performance numbers to reflect
  56  * use of available parallelism, and/or intelligence in context switching to
  57  * avoid premature switching when multiple messages are queued.
  58  */
  59
  60 /*
  61  * The UDP test uses UDP over the loopback interface.  Two arbitrary but
  62  * fixed port numbers.
  63  */
  64 #define UDP_PORT1       2020
  65 #define UDP_PORT2       2021
  66
  67 /*
  68  * Size of each message.  Must be smaller than the socket buffer or pipe
  69  * buffer maximum size, as we want to send it atomically without blocking.
  70  * If pipelining is in use, must be able to fit PIPELINE_MAX of these
  71  * messages into the send queue.
  72  */
  73 #define MESSAGELEN      128
  74
  75 /*
  76  * Number of message cycles -- into fd1, out of fd2, into fd2, and out of
  77  * fd1.  By counting in cycles, we allow the master thread or process to
  78  * perform timing without explicitly synchronizing with the secondary thread
  79  * or process.
  80  */
  81 #define NUMCYCLES       1024
  82
  83 /*
  84  * Number of times to run each test.
  85  */
  86 #define LOOPS           10
  87
  88 /*
  89  * Number of in-flight messages per cycle.  I adjusting this value, be
  90  * careful not to exceed the socket/etc buffer depth, or messages may be lost
  91  * or result in blocking.
  92  */
  93 #define PIPELINE_MAX    4
  94
  95 /*
  96  * As in all programs, steal timespecsub() from time.h.
  97  */
  98 #define timespecsub(vvp, uvp)                                           \
  99         do {                                                            \
 100                 (vvp)->tv_sec -= (uvp)->tv_sec;                         \
 101                 (vvp)->tv_nsec -= (uvp)->tv_nsec;                       \
 102                 if ((vvp)->tv_nsec < 0) {                               \
 103                         (vvp)->tv_sec--;                                \
 104                         (vvp)->tv_nsec += 1000000000;                   \
 105                 }                                                       \
 106         } while (0)
 107
 108 static int
 109 udp_create(int *fd1p, int *fd2p)
 110 {
 111         struct sockaddr_in sin1, sin2;
 112         int sock1, sock2;
 113
 114         sock1 = socket(PF_INET, SOCK_DGRAM, 0);
 115         if (sock1 == -1)
 116                 return (-1);
 117
 118         sock2 = socket(PF_INET, SOCK_DGRAM, 0);
 119         if (sock2 == -1) {
 120                 close(sock1);
 121                 return (-1);
 122         }
 123
 124         bzero(&sin1, sizeof(sin1));
 125         sin1.sin_len = sizeof(sin1);
 126         sin1.sin_family = AF_INET;
 127         sin1.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 128         sin1.sin_port = htons(UDP_PORT1);
 129
 130         bzero(&sin2, sizeof(sin2));
 131         sin2.sin_len = sizeof(sin2);
 132         sin2.sin_family = AF_INET;
 133         sin2.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 134         sin2.sin_port = htons(UDP_PORT2);
 135
 136         if (bind(sock1, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
 137                 close(sock1);
 138                 close(sock2);
 139                 return (-1);
 140         }
 141
 142         if (bind(sock2, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
 143                 close(sock1);
 144                 close(sock2);
 145                 return (-1);
 146         }
 147
 148         if (connect(sock1, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
 149                 close(sock1);
 150                 close(sock2);
 151                 return (-1);
 152         }
 153
 154         if (connect(sock2, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
 155                 close(sock1);
 156                 close(sock2);
 157                 return (-1);
 158         }
 159
 160         *fd1p = sock1;
 161         *fd2p = sock2;
 162
 163         return (0);
 164 }
 165
 166 static int
 167 pipe_create(int *fd1p, int *fd2p)
 168 {
 169         int fds[2];
 170
 171         if (pipe(fds) < 0)
 172                 return (-1);
 173
 174         *fd1p = fds[0];
 175         *fd2p = fds[1];
 176
 177         return (0);
 178 }
 179
 180 static int
 181 socketpairdgram_create(int *fd1p, int *fd2p)
 182 {
 183         int fds[2];
 184
 185         if (socketpair(PF_LOCAL, SOCK_DGRAM, 0, fds) < 0)
 186                 return (-1);
 187
 188         *fd1p = fds[0];
 189         *fd2p = fds[1];
 190
 191         return (0);
 192 }
 193
 194 static int
 195 socketpairstream_create(int *fd1p, int *fd2p)
 196 {
 197         int fds[2];
 198
 199         if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fds) < 0)
 200                 return (-1);
 201
 202         *fd1p = fds[0];
 203         *fd2p = fds[1];
 204
 205         return (0);
 206 }
 207
 208 static int
 209 message_send(int s)
 210 {
 211         u_char buffer[MESSAGELEN];
 212         ssize_t len;
 213
 214         bzero(buffer, sizeof(buffer));
 215
 216         len = write(s, buffer, sizeof(buffer));
 217         if (len == -1)
 218                 return (-1);
 219         if (len != sizeof(buffer)) {
 220                 errno = EMSGSIZE;
 221                 return (-1);
 222         }
 223         return (0);
 224 }
 225
 226 static int
 227 message_recv(int s)
 228 {
 229         u_char buffer[MESSAGELEN];
 230         ssize_t len;
 231
 232         len = read(s, buffer, sizeof(buffer));
 233         if (len == -1)
 234                 return (-1);
 235         if (len != sizeof(buffer)) {
 236                 errno = EMSGSIZE;
 237                 return (-1);
 238         }
 239         return (0);
 240 }
 241
 242 /*
 243  * Juggle messages between two file descriptors in a single thread/process,
 244  * so simply a measure of IPC performance.
 245  */
 246 static struct timespec
 247 juggle(int fd1, int fd2, int pipeline)
 248 {
 249         struct timespec tstart, tfinish;
 250         int i, j;
 251
 252         if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
 253                 err(-1, "juggle: clock_gettime");
 254
 255         for (i = 0; i < NUMCYCLES; i++) {
 256
 257                 for (j = 0; j < pipeline; j++) {
 258                         if (message_send(fd1) < 0)
 259                                 err(-1, "message_send fd1");
 260                 }
 261
 262                 for (j = 0; j < pipeline; j++) {
 263                         if (message_recv(fd2) < 0)
 264                                 err(-1, "message_recv fd2");
 265
 266                         if (message_send(fd2) < 0)
 267                                 err(-1, "message_send fd2");
 268                 }
 269
 270                 for (j = 0; j < pipeline; j++) {
 271                         if (message_recv(fd1) < 0)
 272                                 err(-1, "message_recv fd1");
 273                 }
 274         }
 275
 276         if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
 277                 err(-1, "juggle: clock_gettime");
 278
 279         timespecsub(&tfinish, &tstart);
 280
 281         return (tfinish);
 282 }
 283
 284 /*
 285  * Juggle messages between two file descriptors in two threads, so measure
 286  * the cost of IPC and the cost of a thread context switch.
 287  *
 288  * In order to avoid measuring thread creation time, we make use of a
 289  * condition variable to decide when both threads are ready to begin
 290  * juggling.
 291  */
 292 static int threaded_child_ready;
 293 static pthread_mutex_t threaded_mtx;
 294 static pthread_cond_t threaded_cond;
 295 static int threaded_pipeline;
 296
 297 static void *
 298 juggling_thread(void *arg)
 299 {
 300         int fd2, i, j;
 301
 302         fd2 = *(int *)arg;
 303
 304         if (pthread_mutex_lock(&threaded_mtx) < 0)
 305                 err(-1, "juggling_thread: pthread_mutex_lock");
 306
 307         threaded_child_ready = 1;
 308
 309         if (pthread_cond_signal(&threaded_cond) < 0)
 310                 err(-1, "juggling_thread: pthread_cond_signal");
 311
 312         if (pthread_mutex_unlock(&threaded_mtx) < 0)
 313                 err(-1, "juggling_thread: pthread_mutex_unlock");
 314
 315         for (i = 0; i < NUMCYCLES; i++) {
 316                 for (j = 0; j < threaded_pipeline; j++) {
 317                         if (message_recv(fd2) < 0)
 318                                 err(-1, "message_recv fd2");
 319
 320                         if (message_send(fd2) < 0)
 321                                 err(-1, "message_send fd2");
 322                 }
 323         }
 324
 325         return (NULL);
 326 }
 327
 328 static struct timespec
 329 thread_juggle(int fd1, int fd2, int pipeline)
 330 {
 331         struct timespec tstart, tfinish;
 332         pthread_t thread;
 333         int i, j;
 334
 335         threaded_pipeline = pipeline;
 336
 337         if (pthread_mutex_init(&threaded_mtx, NULL) < 0)
 338                 err(-1, "thread_juggle: pthread_mutex_init");
 339
 340         if (pthread_create(&thread, NULL, juggling_thread, &fd2) < 0)
 341                 err(-1, "thread_juggle: pthread_create");
 342
 343         if (pthread_mutex_lock(&threaded_mtx) < 0)
 344                 err(-1, "thread_juggle: pthread_mutex_lock");
 345
 346         while (!threaded_child_ready) {
 347                 if (pthread_cond_wait(&threaded_cond, &threaded_mtx) < 0)
 348                         err(-1, "thread_juggle: pthread_cond_wait");
 349         }
 350
 351         if (pthread_mutex_unlock(&threaded_mtx) < 0)
 352                 err(-1, "thread_juggle: pthread_mutex_unlock");
 353
 354         if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
 355                 err(-1, "thread_juggle: clock_gettime");
 356
 357         for (i = 0; i < NUMCYCLES; i++) {
 358                 for (j = 0; j < pipeline; j++) {
 359                         if (message_send(fd1) < 0)
 360                                 err(-1, "message_send fd1");
 361                 }
 362
 363                 for (j = 0; j < pipeline; j++) {
 364                         if (message_recv(fd1) < 0)
 365                                 err(-1, "message_recv fd1");
 366                 }
 367         }
 368
 369         if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
 370                 err(-1, "thread_juggle: clock_gettime");
 371
 372         if (pthread_join(thread, NULL) < 0)
 373                 err(-1, "thread_juggle: pthread_join");
 374
 375         timespecsub(&tfinish, &tstart);
 376
 377         return (tfinish);
 378 }
 379
 380 /*
 381  * Juggle messages between two file descriptors in two processes, so measure
 382  * the cost of IPC and the cost of a process context switch.
 383  *
 384  * Since we can't use a mutex between the processes, we simply do an extra
 385  * write on the child to let the parent know that it's ready to start.
 386  */
 387 static struct timespec
 388 process_juggle(int fd1, int fd2, int pipeline)
 389 {
 390         struct timespec tstart, tfinish;
 391         pid_t pid, ppid, wpid;
 392         int error, i, j;
 393
 394         ppid = getpid();
 395
 396         pid = fork();
 397         if (pid < 0)
 398                 err(-1, "process_juggle: fork");
 399
 400         if (pid == 0) {
 401                 if (message_send(fd2) < 0) {
 402                         error = errno;
 403                         kill(ppid, SIGTERM);
 404                         errno = error;
 405                         err(-1, "process_juggle: child: message_send");
 406                 }
 407
 408                 for (i = 0; i < NUMCYCLES; i++) {
 409                         for (j = 0; j < pipeline; j++) {
 410                                 if (message_send(fd2) < 0)
 411                                         err(-1, "message_send fd2");
 412
 413                                 if (message_recv(fd2) < 0)
 414                                         err(-1, "message_recv fd2");
 415                         }
 416                 }
 417
 418                 exit(0);
 419         } else {
 420                 if (message_recv(fd1) < 0) {
 421                         error = errno;
 422                         kill(pid, SIGTERM);
 423                         errno = error;
 424                         err(-1, "process_juggle: parent: message_recv");
 425                 }
 426
 427                 if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
 428                         err(-1, "process_juggle: clock_gettime");
 429
 430                 for (i = 0; i < NUMCYCLES; i++) {
 431                         for (j = 0; j < pipeline; j++) {
 432                                 if (message_send(fd1) < 0) {
 433                                         error = errno;
 434                                         kill(pid, SIGTERM);
 435                                         errno = error;
 436                                         err(-1, "message_send fd1");
 437                                 }
 438                         }
 439
 440                         for (j = 0; j < pipeline; j++) {
 441                                 if (message_recv(fd1) < 0) {
 442                                         error = errno;
 443                                         kill(pid, SIGTERM);
 444                                         errno = error;
 445                                         err(-1, "message_recv fd1");
 446                                 }
 447                         }
 448                 }
 449
 450                 if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
 451                         err(-1, "process_juggle: clock_gettime");
 452         }
 453
 454         wpid = waitpid(pid, NULL, 0);
 455         if (wpid < 0)
 456                 err(-1, "process_juggle: waitpid");
 457         if (wpid != pid)
 458                 errx(-1, "process_juggle: waitpid: pid != wpid");
 459
 460         timespecsub(&tfinish, &tstart);
 461
 462         return (tfinish);
 463 }
 464
 465 /*
 466  * When we print out results for larger pipeline sizes, we scale back by the
 467  * depth of the pipeline.  This generally means dividing by the pipeline
 468  * depth.  Except when it means dividing by zero.
 469  */
 470 static void
 471 scale_timespec(struct timespec *ts, int p)
 472 {
 473
 474         if (p == 0)
 475                 return;
 476
 477         ts->tv_sec /= p;
 478         ts->tv_nsec /= p;
 479 }
 480
 481 static const struct ipctype {
 482         int             (*it_create)(int *fd1p, int *fd2p);
 483         const char      *it_name;
 484 } ipctypes[] = {
 485         { pipe_create, "pipe" },
 486         { udp_create, "udp" },
 487         { socketpairdgram_create, "socketpairdgram" },
 488         { socketpairstream_create, "socketpairstream" },
 489 };
 490 static const int ipctypes_len = (sizeof(ipctypes) / sizeof(struct ipctype));
 491
 492 int
 493 main(int argc, char *argv[])
 494 {
 495         struct timespec juggle_results[LOOPS], process_results[LOOPS];
 496         struct timespec thread_results[LOOPS];
 497         int fd1, fd2, i, j, p;
 498         struct utsname uts;
 499
 500         printf("version, juggle.c %s\n", "$FreeBSD$");
 501
 502         if (uname(&uts) < 0)
 503                 err(-1, "utsname");
 504         printf("sysname, %s\n", uts.sysname);
 505         printf("nodename, %s\n", uts.nodename);
 506         printf("release, %s\n", uts.release);
 507         printf("version, %s\n", uts.version);
 508         printf("machine, %s\n", uts.machine);
 509         printf("\n");
 510
 511         printf("MESSAGELEN, %d\n", MESSAGELEN);
 512         printf("NUMCYCLES, %d\n", NUMCYCLES);
 513         printf("LOOPS, %d\n", LOOPS);
 514         printf("PIPELINE_MAX, %d\n", PIPELINE_MAX);
 515         printf("\n\n");
 516
 517         printf("ipctype, test, pipeline_depth");
 518         for (j = 0; j < LOOPS; j++)
 519                 printf(", data%d", j);
 520         printf("\n");
 521         fflush(stdout);
 522         for (p = 0; p < PIPELINE_MAX + 1; p++) {
 523                 for (i = 0; i < ipctypes_len; i++) {
 524                         if (ipctypes[i].it_create(&fd1, &fd2) < 0)
 525                                 err(-1, "main: %s", ipctypes[i].it_name);
 526
 527                         /*
 528                          * For each test, do one uncounted warmup, then LOOPS
 529                          * runs of the actual test.
 530                          */
 531                         juggle(fd1, fd2, p);
 532                         for (j = 0; j < LOOPS; j++)
 533                                 juggle_results[j] = juggle(fd1, fd2, p);
 534                         process_juggle(fd1, fd2, p);
 535                         for (j = 0; j < LOOPS; j++)
 536                                 process_results[j] = process_juggle(fd1, fd2,
 537                                     p);
 538                         thread_juggle(fd1, fd2, p);
 539                         for (j = 0; j < LOOPS; j++)
 540                                 thread_results[j] = thread_juggle(fd1, fd2,
 541                                     p);
 542                         for (j = 0; j < LOOPS; j++) {
 543                                 thread_results[j].tv_sec = 0;
 544                                 thread_results[j].tv_nsec = 0;
 545                         }
 546                         close(fd1);
 547                         close(fd2);
 548                 }
 549                 /*
 550                  * When printing results for the round, normalize the results
 551                  * with respect to the pipeline depth.  We're doing p times
 552                  * as much work, and are we taking p times as long?
 553                  */
 554                 for (i = 0; i < ipctypes_len; i++) {
 555                         printf("%s, juggle, %d, ", ipctypes[i].it_name, p);
 556                         for (j = 0; j < LOOPS; j++) {
 557                                 if (j != 0)
 558                                         printf(", ");
 559                                 scale_timespec(&juggle_results[j], p);
 560                                 printf("%u.%09lu", juggle_results[j].tv_sec,
 561                                     juggle_results[j].tv_nsec);
 562                         }
 563                         printf("\n");
 564                         printf("%s, process_juggle, %d, ",
 565                             ipctypes[i].it_name, p);
 566                         for (j = 0; j < LOOPS; j++) {
 567                                 if (j != 0)
 568                                         printf(", ");
 569                                 scale_timespec(&process_results[j], p);
 570                                 printf("%u.%09lu", process_results[j].tv_sec,
 571                                     process_results[j].tv_nsec);
 572                         }
 573                         printf("\n");
 574                         printf("%s, thread_juggle, %d, ",
 575                             ipctypes[i].it_name, p);
 576                         for (j = 0; j < LOOPS; j++) {
 577                                 if (j != 0)
 578                                         printf(", ");
 579                                 scale_timespec(&thread_results[j], p);
 580                                 printf("%u.%09lu", thread_results[j].tv_sec,
 581                                     thread_results[j].tv_nsec);
 582                         }
 583                         printf("\n");
 584                 }
 585                 fflush(stdout);
 586         }
 587         return (0);
 588 }