tools/tools/netrate/juggle/juggle.c

   1 /*-
   2  * Copyright (c) 2005 Robert N. M. Watson
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  *
  26  * $FreeBSD$
  27  */
  28
  29 #include <sys/types.h>
  30 #include <sys/socket.h>
  31 #include <sys/stdint.h>
  32 #include <sys/time.h>
  33 #include <sys/utsname.h>
  34 #include <sys/wait.h>
  35
  36 #include <netinet/in.h>
  37
  38 #include <err.h>
  39 #include <errno.h>
  40 #include <pthread.h>
  41 #include <signal.h>
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45 #include <unistd.h>
  46
  47 /*
  48  * juggle is a simple IPC/context switch performance test, which works on
  49  * pairs of file descriptors of various types.  In various runs, it considers
  50  * the cost of bouncing a message synchronously across the descriptor pair,
  51  * either in the same thread, two different threads, or two different
  52  * processes.  Timing measurements for each series of I/O's are reported, but
  53  * the first measurement in each series discarded as "warmup" on the IPC
  54  * primitive.  Variations on the test permit for pipelining, or the insertion
  55  * of more than one packet into the stream at a time, intended to permit
  56  * greater parallelism, hopefully allowing performance numbers to reflect
  57  * use of available parallelism, and/or intelligence in context switching to
  58  * avoid premature switching when multiple messages are queued.
  59  */
  60
  61 /*
  62  * The UDP test uses UDP over the loopback interface.  Two arbitrary but
  63  * fixed port numbers.
  64  */
  65 #define UDP_PORT1       2020
  66 #define UDP_PORT2       2021
  67
  68 /*
  69  * Size of each message.  Must be smaller than the socket buffer or pipe
  70  * buffer maximum size, as we want to send it atomically without blocking.
  71  * If pipelining is in use, must be able to fit PIPELINE_MAX of these
  72  * messages into the send queue.
  73  */
  74 #define MESSAGELEN      128
  75
  76 /*
  77  * Number of message cycles -- into fd1, out of fd2, into fd2, and out of
  78  * fd1.  By counting in cycles, we allow the master thread or process to
  79  * perform timing without explicitly synchronizing with the secondary thread
  80  * or process.
  81  */
  82 #define NUMCYCLES       1024
  83
  84 /*
  85  * Number of times to run each test.
  86  */
  87 #define LOOPS           10
  88
  89 /*
  90  * Number of in-flight messages per cycle.  I adjusting this value, be
  91  * careful not to exceed the socket/etc buffer depth, or messages may be lost
  92  * or result in blocking.
  93  */
  94 #define PIPELINE_MAX    4
  95
  96 /*
  97  * As in all programs, steal timespecsub() from time.h.
  98  */
  99 #define timespecsub(vvp, uvp)                                           \
 100         do {                                                            \
 101                 (vvp)->tv_sec -= (uvp)->tv_sec;                         \
 102                 (vvp)->tv_nsec -= (uvp)->tv_nsec;                       \
 103                 if ((vvp)->tv_nsec < 0) {                               \
 104                         (vvp)->tv_sec--;                                \
 105                         (vvp)->tv_nsec += 1000000000;                   \
 106                 }                                                       \
 107         } while (0)
 108
 109 static int
 110 udp_create(int *fd1p, int *fd2p)
 111 {
 112         struct sockaddr_in sin1, sin2;
 113         int sock1, sock2;
 114
 115         sock1 = socket(PF_INET, SOCK_DGRAM, 0);
 116         if (sock1 == -1)
 117                 return (-1);
 118
 119         sock2 = socket(PF_INET, SOCK_DGRAM, 0);
 120         if (sock2 == -1) {
 121                 close(sock1);
 122                 return (-1);
 123         }
 124
 125         bzero(&sin1, sizeof(sin1));
 126         sin1.sin_len = sizeof(sin1);
 127         sin1.sin_family = AF_INET;
 128         sin1.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 129         sin1.sin_port = htons(UDP_PORT1);
 130
 131         bzero(&sin2, sizeof(sin2));
 132         sin2.sin_len = sizeof(sin2);
 133         sin2.sin_family = AF_INET;
 134         sin2.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 135         sin2.sin_port = htons(UDP_PORT2);
 136
 137         if (bind(sock1, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
 138                 close(sock1);
 139                 close(sock2);
 140                 return (-1);
 141         }
 142
 143         if (bind(sock2, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
 144                 close(sock1);
 145                 close(sock2);
 146                 return (-1);
 147         }
 148
 149         if (connect(sock1, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
 150                 close(sock1);
 151                 close(sock2);
 152                 return (-1);
 153         }
 154
 155         if (connect(sock2, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
 156                 close(sock1);
 157                 close(sock2);
 158                 return (-1);
 159         }
 160
 161         *fd1p = sock1;
 162         *fd2p = sock2;
 163
 164         return (0);
 165 }
 166
 167 static int
 168 pipe_create(int *fd1p, int *fd2p)
 169 {
 170         int fds[2];
 171
 172         if (pipe(fds) < 0)
 173                 return (-1);
 174
 175         *fd1p = fds[0];
 176         *fd2p = fds[1];
 177
 178         return (0);
 179 }
 180
 181 static int
 182 socketpairdgram_create(int *fd1p, int *fd2p)
 183 {
 184         int fds[2];
 185
 186         if (socketpair(PF_LOCAL, SOCK_DGRAM, 0, fds) < 0)
 187                 return (-1);
 188
 189         *fd1p = fds[0];
 190         *fd2p = fds[1];
 191
 192         return (0);
 193 }
 194
 195 static int
 196 socketpairstream_create(int *fd1p, int *fd2p)
 197 {
 198         int fds[2];
 199
 200         if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fds) < 0)
 201                 return (-1);
 202
 203         *fd1p = fds[0];
 204         *fd2p = fds[1];
 205
 206         return (0);
 207 }
 208
 209 static int
 210 message_send(int s)
 211 {
 212         u_char buffer[MESSAGELEN];
 213         ssize_t len;
 214
 215         bzero(buffer, sizeof(buffer));
 216
 217         len = write(s, buffer, sizeof(buffer));
 218         if (len == -1)
 219                 return (-1);
 220         if (len != sizeof(buffer)) {
 221                 errno = EMSGSIZE;
 222                 return (-1);
 223         }
 224         return (0);
 225 }
 226
 227 static int
 228 message_recv(int s)
 229 {
 230         u_char buffer[MESSAGELEN];
 231         ssize_t len;
 232
 233         len = read(s, buffer, sizeof(buffer));
 234         if (len == -1)
 235                 return (-1);
 236         if (len != sizeof(buffer)) {
 237                 errno = EMSGSIZE;
 238                 return (-1);
 239         }
 240         return (0);
 241 }
 242
 243 /*
 244  * Juggle messages between two file descriptors in a single thread/process,
 245  * so simply a measure of IPC performance.
 246  */
 247 static struct timespec
 248 juggle(int fd1, int fd2, int pipeline)
 249 {
 250         struct timespec tstart, tfinish;
 251         int i, j;
 252
 253         if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
 254                 err(-1, "juggle: clock_gettime");
 255
 256         for (i = 0; i < NUMCYCLES; i++) {
 257
 258                 for (j = 0; j < pipeline; j++) {
 259                         if (message_send(fd1) < 0)
 260                                 err(-1, "message_send fd1");
 261                 }
 262
 263                 for (j = 0; j < pipeline; j++) {
 264                         if (message_recv(fd2) < 0)
 265                                 err(-1, "message_recv fd2");
 266
 267                         if (message_send(fd2) < 0)
 268                                 err(-1, "message_send fd2");
 269                 }
 270
 271                 for (j = 0; j < pipeline; j++) {
 272                         if (message_recv(fd1) < 0)
 273                                 err(-1, "message_recv fd1");
 274                 }
 275         }
 276
 277         if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
 278                 err(-1, "juggle: clock_gettime");
 279
 280         timespecsub(&tfinish, &tstart);
 281
 282         return (tfinish);
 283 }
 284
 285 /*
 286  * Juggle messages between two file descriptors in two threads, so measure
 287  * the cost of IPC and the cost of a thread context switch.
 288  *
 289  * In order to avoid measuring thread creation time, we make use of a
 290  * condition variable to decide when both threads are ready to begin
 291  * juggling.
 292  */
 293 static int threaded_child_ready;
 294 static pthread_mutex_t threaded_mtx;
 295 static pthread_cond_t threaded_cond;
 296 static int threaded_pipeline;
 297
 298 static void *
 299 juggling_thread(void *arg)
 300 {
 301         int fd2, i, j;
 302
 303         fd2 = *(int *)arg;
 304
 305         if (pthread_mutex_lock(&threaded_mtx) < 0)
 306                 err(-1, "juggling_thread: pthread_mutex_lock");
 307
 308         threaded_child_ready = 1;
 309
 310         if (pthread_cond_signal(&threaded_cond) < 0)
 311                 err(-1, "juggling_thread: pthread_cond_signal");
 312
 313         if (pthread_mutex_unlock(&threaded_mtx) < 0)
 314                 err(-1, "juggling_thread: pthread_mutex_unlock");
 315
 316         for (i = 0; i < NUMCYCLES; i++) {
 317                 for (j = 0; j < threaded_pipeline; j++) {
 318                         if (message_recv(fd2) < 0)
 319                                 err(-1, "message_recv fd2");
 320
 321                         if (message_send(fd2) < 0)
 322                                 err(-1, "message_send fd2");
 323                 }
 324         }
 325
 326         return (NULL);
 327 }
 328
 329 static struct timespec
 330 thread_juggle(int fd1, int fd2, int pipeline)
 331 {
 332         struct timespec tstart, tfinish;
 333         pthread_t thread;
 334         int i, j;
 335
 336         threaded_pipeline = pipeline;
 337
 338         if (pthread_mutex_init(&threaded_mtx, NULL) < 0)
 339                 err(-1, "thread_juggle: pthread_mutex_init");
 340
 341         if (pthread_create(&thread, NULL, juggling_thread, &fd2) < 0)
 342                 err(-1, "thread_juggle: pthread_create");
 343
 344         if (pthread_mutex_lock(&threaded_mtx) < 0)
 345                 err(-1, "thread_juggle: pthread_mutex_lock");
 346
 347         while (!threaded_child_ready) {
 348                 if (pthread_cond_wait(&threaded_cond, &threaded_mtx) < 0)
 349                         err(-1, "thread_juggle: pthread_cond_wait");
 350         }
 351
 352         if (pthread_mutex_unlock(&threaded_mtx) < 0)
 353                 err(-1, "thread_juggle: pthread_mutex_unlock");
 354
 355         if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
 356                 err(-1, "thread_juggle: clock_gettime");
 357
 358         for (i = 0; i < NUMCYCLES; i++) {
 359                 for (j = 0; j < pipeline; j++) {
 360                         if (message_send(fd1) < 0)
 361                                 err(-1, "message_send fd1");
 362                 }
 363
 364                 for (j = 0; j < pipeline; j++) {
 365                         if (message_recv(fd1) < 0)
 366                                 err(-1, "message_recv fd1");
 367                 }
 368         }
 369
 370         if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
 371                 err(-1, "thread_juggle: clock_gettime");
 372
 373         if (pthread_join(thread, NULL) < 0)
 374                 err(-1, "thread_juggle: pthread_join");
 375
 376         timespecsub(&tfinish, &tstart);
 377
 378         return (tfinish);
 379 }
 380
 381 /*
 382  * Juggle messages between two file descriptors in two processes, so measure
 383  * the cost of IPC and the cost of a process context switch.
 384  *
 385  * Since we can't use a mutex between the processes, we simply do an extra
 386  * write on the child to let the parent know that it's ready to start.
 387  */
 388 static struct timespec
 389 process_juggle(int fd1, int fd2, int pipeline)
 390 {
 391         struct timespec tstart, tfinish;
 392         pid_t pid, ppid, wpid;
 393         int error, i, j;
 394
 395         ppid = getpid();
 396
 397         pid = fork();
 398         if (pid < 0)
 399                 err(-1, "process_juggle: fork");
 400
 401         if (pid == 0) {
 402                 if (message_send(fd2) < 0) {
 403                         error = errno;
 404                         kill(ppid, SIGTERM);
 405                         errno = error;
 406                         err(-1, "process_juggle: child: message_send");
 407                 }
 408
 409                 for (i = 0; i < NUMCYCLES; i++) {
 410                         for (j = 0; j < pipeline; j++) {
 411                                 if (message_send(fd2) < 0)
 412                                         err(-1, "message_send fd2");
 413
 414                                 if (message_recv(fd2) < 0)
 415                                         err(-1, "message_recv fd2");
 416                         }
 417                 }
 418
 419                 exit(0);
 420         } else {
 421                 if (message_recv(fd1) < 0) {
 422                         error = errno;
 423                         kill(pid, SIGTERM);
 424                         errno = error;
 425                         err(-1, "process_juggle: parent: message_recv");
 426                 }
 427
 428                 if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
 429                         err(-1, "process_juggle: clock_gettime");
 430
 431                 for (i = 0; i < NUMCYCLES; i++) {
 432                         for (j = 0; j < pipeline; j++) {
 433                                 if (message_send(fd1) < 0) {
 434                                         error = errno;
 435                                         kill(pid, SIGTERM);
 436                                         errno = error;
 437                                         err(-1, "message_send fd1");
 438                                 }
 439                         }
 440
 441                         for (j = 0; j < pipeline; j++) {
 442                                 if (message_recv(fd1) < 0) {
 443                                         error = errno;
 444                                         kill(pid, SIGTERM);
 445                                         errno = error;
 446                                         err(-1, "message_recv fd1");
 447                                 }
 448                         }
 449                 }
 450
 451                 if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
 452                         err(-1, "process_juggle: clock_gettime");
 453         }
 454
 455         wpid = waitpid(pid, NULL, 0);
 456         if (wpid < 0)
 457                 err(-1, "process_juggle: waitpid");
 458         if (wpid != pid)
 459                 errx(-1, "process_juggle: waitpid: pid != wpid");
 460
 461         timespecsub(&tfinish, &tstart);
 462
 463         return (tfinish);
 464 }
 465
 466 /*
 467  * When we print out results for larger pipeline sizes, we scale back by the
 468  * depth of the pipeline.  This generally means dividing by the pipeline
 469  * depth.  Except when it means dividing by zero.
 470  */
 471 static void
 472 scale_timespec(struct timespec *ts, int p)
 473 {
 474
 475         if (p == 0)
 476                 return;
 477
 478         ts->tv_sec /= p;
 479         ts->tv_nsec /= p;
 480 }
 481
 482 static const struct ipctype {
 483         int             (*it_create)(int *fd1p, int *fd2p);
 484         const char      *it_name;
 485 } ipctypes[] = {
 486         { pipe_create, "pipe" },
 487         { udp_create, "udp" },
 488         { socketpairdgram_create, "socketpairdgram" },
 489         { socketpairstream_create, "socketpairstream" },
 490 };
 491 static const int ipctypes_len = (sizeof(ipctypes) / sizeof(struct ipctype));
 492
 493 int
 494 main(int argc, char *argv[])
 495 {
 496         struct timespec juggle_results[LOOPS], process_results[LOOPS];
 497         struct timespec thread_results[LOOPS];
 498         int fd1, fd2, i, j, p;
 499         struct utsname uts;
 500
 501         printf("version, juggle.c %s\n", "$FreeBSD$");
 502
 503         if (uname(&uts) < 0)
 504                 err(-1, "utsname");
 505         printf("sysname, %s\n", uts.sysname);
 506         printf("nodename, %s\n", uts.nodename);
 507         printf("release, %s\n", uts.release);
 508         printf("version, %s\n", uts.version);
 509         printf("machine, %s\n", uts.machine);
 510         printf("\n");
 511
 512         printf("MESSAGELEN, %d\n", MESSAGELEN);
 513         printf("NUMCYCLES, %d\n", NUMCYCLES);
 514         printf("LOOPS, %d\n", LOOPS);
 515         printf("PIPELINE_MAX, %d\n", PIPELINE_MAX);
 516         printf("\n\n");
 517
 518         printf("ipctype, test, pipeline_depth");
 519         for (j = 0; j < LOOPS; j++)
 520                 printf(", data%d", j);
 521         printf("\n");
 522         fflush(stdout);
 523         for (p = 0; p < PIPELINE_MAX + 1; p++) {
 524                 for (i = 0; i < ipctypes_len; i++) {
 525                         if (ipctypes[i].it_create(&fd1, &fd2) < 0)
 526                                 err(-1, "main: %s", ipctypes[i].it_name);
 527
 528                         /*
 529                          * For each test, do one uncounted warmup, then LOOPS
 530                          * runs of the actual test.
 531                          */
 532                         juggle(fd1, fd2, p);
 533                         for (j = 0; j < LOOPS; j++)
 534                                 juggle_results[j] = juggle(fd1, fd2, p);
 535                         process_juggle(fd1, fd2, p);
 536                         for (j = 0; j < LOOPS; j++)
 537                                 process_results[j] = process_juggle(fd1, fd2,
 538                                     p);
 539                         thread_juggle(fd1, fd2, p);
 540                         for (j = 0; j < LOOPS; j++)
 541                                 thread_results[j] = thread_juggle(fd1, fd2,
 542                                     p);
 543                         for (j = 0; j < LOOPS; j++) {
 544                                 thread_results[j].tv_sec = 0;
 545                                 thread_results[j].tv_nsec = 0;
 546                         }
 547                         close(fd1);
 548                         close(fd2);
 549                 }
 550                 /*
 551                  * When printing results for the round, normalize the results
 552                  * with respect to the pipeline depth.  We're doing p times
 553                  * as much work, and are we taking p times as long?
 554                  */
 555                 for (i = 0; i < ipctypes_len; i++) {
 556                         printf("%s, juggle, %d, ", ipctypes[i].it_name, p);
 557                         for (j = 0; j < LOOPS; j++) {
 558                                 if (j != 0)
 559                                         printf(", ");
 560                                 scale_timespec(&juggle_results[j], p);
 561                                 printf("%jd.%09lu",
 562                                     (intmax_t)juggle_results[j].tv_sec,
 563                                     juggle_results[j].tv_nsec);
 564                         }
 565                         printf("\n");
 566                         printf("%s, process_juggle, %d, ",
 567                             ipctypes[i].it_name, p);
 568                         for (j = 0; j < LOOPS; j++) {
 569                                 if (j != 0)
 570                                         printf(", ");
 571                                 scale_timespec(&process_results[j], p);
 572                                 printf("%jd.%09lu",
 573                                     (intmax_t)process_results[j].tv_sec,
 574                                     process_results[j].tv_nsec);
 575                         }
 576                         printf("\n");
 577                         printf("%s, thread_juggle, %d, ",
 578                             ipctypes[i].it_name, p);
 579                         for (j = 0; j < LOOPS; j++) {
 580                                 if (j != 0)
 581                                         printf(", ");
 582                                 scale_timespec(&thread_results[j], p);
 583                                 printf("%jd.%09lu",
 584                                     (intmax_t)thread_results[j].tv_sec,
 585                                     thread_results[j].tv_nsec);
 586                         }
 587                         printf("\n");
 588                 }
 589                 fflush(stdout);
 590         }
 591         return (0);
 592 }