tools/tools/netrate/juggle/juggle.c

   1 /*-
   2  * Copyright (c) 2005 Robert N. M. Watson
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  */
  26
  27 #include <sys/types.h>
  28 #include <sys/socket.h>
  29 #include <sys/stdint.h>
  30 #include <sys/time.h>
  31 #include <sys/utsname.h>
  32 #include <sys/wait.h>
  33
  34 #include <netinet/in.h>
  35
  36 #include <err.h>
  37 #include <errno.h>
  38 #include <pthread.h>
  39 #include <signal.h>
  40 #include <stdio.h>
  41 #include <stdlib.h>
  42 #include <string.h>
  43 #include <unistd.h>
  44
  45 /*
  46  * juggle is a simple IPC/context switch performance test, which works on
  47  * pairs of file descriptors of various types.  In various runs, it considers
  48  * the cost of bouncing a message synchronously across the descriptor pair,
  49  * either in the same thread, two different threads, or two different
  50  * processes.  Timing measurements for each series of I/O's are reported, but
  51  * the first measurement in each series discarded as "warmup" on the IPC
  52  * primitive.  Variations on the test permit for pipelining, or the insertion
  53  * of more than one packet into the stream at a time, intended to permit
  54  * greater parallelism, hopefully allowing performance numbers to reflect
  55  * use of available parallelism, and/or intelligence in context switching to
  56  * avoid premature switching when multiple messages are queued.
  57  */
  58
  59 /*
  60  * The UDP test uses UDP over the loopback interface.  Two arbitrary but
  61  * fixed port numbers.
  62  */
  63 #define UDP_PORT1       2020
  64 #define UDP_PORT2       2021
  65
  66 /*
  67  * Size of each message.  Must be smaller than the socket buffer or pipe
  68  * buffer maximum size, as we want to send it atomically without blocking.
  69  * If pipelining is in use, must be able to fit PIPELINE_MAX of these
  70  * messages into the send queue.
  71  */
  72 #define MESSAGELEN      128
  73
  74 /*
  75  * Number of message cycles -- into fd1, out of fd2, into fd2, and out of
  76  * fd1.  By counting in cycles, we allow the master thread or process to
  77  * perform timing without explicitly synchronizing with the secondary thread
  78  * or process.
  79  */
  80 #define NUMCYCLES       1024
  81
  82 /*
  83  * Number of times to run each test.
  84  */
  85 #define LOOPS           10
  86
  87 /*
  88  * Number of in-flight messages per cycle.  I adjusting this value, be
  89  * careful not to exceed the socket/etc buffer depth, or messages may be lost
  90  * or result in blocking.
  91  */
  92 #define PIPELINE_MAX    4
  93
  94 static int
  95 udp_create(int *fd1p, int *fd2p)
  96 {
  97         struct sockaddr_in sin1, sin2;
  98         int sock1, sock2;
  99
 100         sock1 = socket(PF_INET, SOCK_DGRAM, 0);
 101         if (sock1 == -1)
 102                 return (-1);
 103
 104         sock2 = socket(PF_INET, SOCK_DGRAM, 0);
 105         if (sock2 == -1) {
 106                 close(sock1);
 107                 return (-1);
 108         }
 109
 110         bzero(&sin1, sizeof(sin1));
 111         sin1.sin_len = sizeof(sin1);
 112         sin1.sin_family = AF_INET;
 113         sin1.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 114         sin1.sin_port = htons(UDP_PORT1);
 115
 116         bzero(&sin2, sizeof(sin2));
 117         sin2.sin_len = sizeof(sin2);
 118         sin2.sin_family = AF_INET;
 119         sin2.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 120         sin2.sin_port = htons(UDP_PORT2);
 121
 122         if (bind(sock1, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
 123                 close(sock1);
 124                 close(sock2);
 125                 return (-1);
 126         }
 127
 128         if (bind(sock2, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
 129                 close(sock1);
 130                 close(sock2);
 131                 return (-1);
 132         }
 133
 134         if (connect(sock1, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
 135                 close(sock1);
 136                 close(sock2);
 137                 return (-1);
 138         }
 139
 140         if (connect(sock2, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
 141                 close(sock1);
 142                 close(sock2);
 143                 return (-1);
 144         }
 145
 146         *fd1p = sock1;
 147         *fd2p = sock2;
 148
 149         return (0);
 150 }
 151
 152 static int
 153 pipe_create(int *fd1p, int *fd2p)
 154 {
 155         int fds[2];
 156
 157         if (pipe(fds) < 0)
 158                 return (-1);
 159
 160         *fd1p = fds[0];
 161         *fd2p = fds[1];
 162
 163         return (0);
 164 }
 165
 166 static int
 167 socketpairdgram_create(int *fd1p, int *fd2p)
 168 {
 169         int fds[2];
 170
 171         if (socketpair(PF_LOCAL, SOCK_DGRAM, 0, fds) < 0)
 172                 return (-1);
 173
 174         *fd1p = fds[0];
 175         *fd2p = fds[1];
 176
 177         return (0);
 178 }
 179
 180 static int
 181 socketpairstream_create(int *fd1p, int *fd2p)
 182 {
 183         int fds[2];
 184
 185         if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fds) < 0)
 186                 return (-1);
 187
 188         *fd1p = fds[0];
 189         *fd2p = fds[1];
 190
 191         return (0);
 192 }
 193
 194 static int
 195 message_send(int s)
 196 {
 197         u_char buffer[MESSAGELEN];
 198         ssize_t len;
 199
 200         bzero(buffer, sizeof(buffer));
 201
 202         len = write(s, buffer, sizeof(buffer));
 203         if (len == -1)
 204                 return (-1);
 205         if (len != sizeof(buffer)) {
 206                 errno = EMSGSIZE;
 207                 return (-1);
 208         }
 209         return (0);
 210 }
 211
 212 static int
 213 message_recv(int s)
 214 {
 215         u_char buffer[MESSAGELEN];
 216         ssize_t len;
 217
 218         len = read(s, buffer, sizeof(buffer));
 219         if (len == -1)
 220                 return (-1);
 221         if (len != sizeof(buffer)) {
 222                 errno = EMSGSIZE;
 223                 return (-1);
 224         }
 225         return (0);
 226 }
 227
 228 /*
 229  * Juggle messages between two file descriptors in a single thread/process,
 230  * so simply a measure of IPC performance.
 231  */
 232 static struct timespec
 233 juggle(int fd1, int fd2, int pipeline)
 234 {
 235         struct timespec tstart, tfinish;
 236         int i, j;
 237
 238         if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
 239                 err(-1, "juggle: clock_gettime");
 240
 241         for (i = 0; i < NUMCYCLES; i++) {
 242
 243                 for (j = 0; j < pipeline; j++) {
 244                         if (message_send(fd1) < 0)
 245                                 err(-1, "message_send fd1");
 246                 }
 247
 248                 for (j = 0; j < pipeline; j++) {
 249                         if (message_recv(fd2) < 0)
 250                                 err(-1, "message_recv fd2");
 251
 252                         if (message_send(fd2) < 0)
 253                                 err(-1, "message_send fd2");
 254                 }
 255
 256                 for (j = 0; j < pipeline; j++) {
 257                         if (message_recv(fd1) < 0)
 258                                 err(-1, "message_recv fd1");
 259                 }
 260         }
 261
 262         if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
 263                 err(-1, "juggle: clock_gettime");
 264
 265         timespecsub(&tfinish, &tstart, &tfinish);
 266
 267         return (tfinish);
 268 }
 269
 270 /*
 271  * Juggle messages between two file descriptors in two threads, so measure
 272  * the cost of IPC and the cost of a thread context switch.
 273  *
 274  * In order to avoid measuring thread creation time, we make use of a
 275  * condition variable to decide when both threads are ready to begin
 276  * juggling.
 277  */
 278 static int threaded_child_ready;
 279 static pthread_mutex_t threaded_mtx;
 280 static pthread_cond_t threaded_cond;
 281 static int threaded_pipeline;
 282
 283 static void *
 284 juggling_thread(void *arg)
 285 {
 286         int fd2, i, j;
 287
 288         fd2 = *(int *)arg;
 289
 290         if (pthread_mutex_lock(&threaded_mtx) != 0)
 291                 err(-1, "juggling_thread: pthread_mutex_lock");
 292
 293         threaded_child_ready = 1;
 294
 295         if (pthread_cond_signal(&threaded_cond) != 0)
 296                 err(-1, "juggling_thread: pthread_cond_signal");
 297
 298         if (pthread_mutex_unlock(&threaded_mtx) != 0)
 299                 err(-1, "juggling_thread: pthread_mutex_unlock");
 300
 301         for (i = 0; i < NUMCYCLES; i++) {
 302                 for (j = 0; j < threaded_pipeline; j++) {
 303                         if (message_recv(fd2) < 0)
 304                                 err(-1, "message_recv fd2");
 305
 306                         if (message_send(fd2) < 0)
 307                                 err(-1, "message_send fd2");
 308                 }
 309         }
 310
 311         return (NULL);
 312 }
 313
 314 static struct timespec
 315 thread_juggle(int fd1, int fd2, int pipeline)
 316 {
 317         struct timespec tstart, tfinish;
 318         pthread_t thread;
 319         int i, j;
 320
 321         threaded_pipeline = pipeline;
 322
 323         if (pthread_mutex_init(&threaded_mtx, NULL) != 0)
 324                 err(-1, "thread_juggle: pthread_mutex_init");
 325
 326         if (pthread_create(&thread, NULL, juggling_thread, &fd2) != 0)
 327                 err(-1, "thread_juggle: pthread_create");
 328
 329         if (pthread_mutex_lock(&threaded_mtx) != 0)
 330                 err(-1, "thread_juggle: pthread_mutex_lock");
 331
 332         while (!threaded_child_ready) {
 333                 if (pthread_cond_wait(&threaded_cond, &threaded_mtx) != 0)
 334                         err(-1, "thread_juggle: pthread_cond_wait");
 335         }
 336
 337         if (pthread_mutex_unlock(&threaded_mtx) != 0)
 338                 err(-1, "thread_juggle: pthread_mutex_unlock");
 339
 340         if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
 341                 err(-1, "thread_juggle: clock_gettime");
 342
 343         for (i = 0; i < NUMCYCLES; i++) {
 344                 for (j = 0; j < pipeline; j++) {
 345                         if (message_send(fd1) < 0)
 346                                 err(-1, "message_send fd1");
 347                 }
 348
 349                 for (j = 0; j < pipeline; j++) {
 350                         if (message_recv(fd1) < 0)
 351                                 err(-1, "message_recv fd1");
 352                 }
 353         }
 354
 355         if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
 356                 err(-1, "thread_juggle: clock_gettime");
 357
 358         if (pthread_join(thread, NULL) != 0)
 359                 err(-1, "thread_juggle: pthread_join");
 360
 361         timespecsub(&tfinish, &tstart, &tfinish);
 362
 363         return (tfinish);
 364 }
 365
 366 /*
 367  * Juggle messages between two file descriptors in two processes, so measure
 368  * the cost of IPC and the cost of a process context switch.
 369  *
 370  * Since we can't use a mutex between the processes, we simply do an extra
 371  * write on the child to let the parent know that it's ready to start.
 372  */
 373 static struct timespec
 374 process_juggle(int fd1, int fd2, int pipeline)
 375 {
 376         struct timespec tstart, tfinish;
 377         pid_t pid, ppid, wpid;
 378         int error, i, j;
 379
 380         ppid = getpid();
 381
 382         pid = fork();
 383         if (pid < 0)
 384                 err(-1, "process_juggle: fork");
 385
 386         if (pid == 0) {
 387                 if (message_send(fd2) < 0) {
 388                         error = errno;
 389                         kill(ppid, SIGTERM);
 390                         errno = error;
 391                         err(-1, "process_juggle: child: message_send");
 392                 }
 393
 394                 for (i = 0; i < NUMCYCLES; i++) {
 395                         for (j = 0; j < pipeline; j++) {
 396                                 if (message_send(fd2) < 0)
 397                                         err(-1, "message_send fd2");
 398
 399                                 if (message_recv(fd2) < 0)
 400                                         err(-1, "message_recv fd2");
 401                         }
 402                 }
 403
 404                 exit(0);
 405         } else {
 406                 if (message_recv(fd1) < 0) {
 407                         error = errno;
 408                         kill(pid, SIGTERM);
 409                         errno = error;
 410                         err(-1, "process_juggle: parent: message_recv");
 411                 }
 412
 413                 if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
 414                         err(-1, "process_juggle: clock_gettime");
 415
 416                 for (i = 0; i < NUMCYCLES; i++) {
 417                         for (j = 0; j < pipeline; j++) {
 418                                 if (message_send(fd1) < 0) {
 419                                         error = errno;
 420                                         kill(pid, SIGTERM);
 421                                         errno = error;
 422                                         err(-1, "message_send fd1");
 423                                 }
 424                         }
 425
 426                         for (j = 0; j < pipeline; j++) {
 427                                 if (message_recv(fd1) < 0) {
 428                                         error = errno;
 429                                         kill(pid, SIGTERM);
 430                                         errno = error;
 431                                         err(-1, "message_recv fd1");
 432                                 }
 433                         }
 434                 }
 435
 436                 if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
 437                         err(-1, "process_juggle: clock_gettime");
 438         }
 439
 440         wpid = waitpid(pid, NULL, 0);
 441         if (wpid < 0)
 442                 err(-1, "process_juggle: waitpid");
 443         if (wpid != pid)
 444                 errx(-1, "process_juggle: waitpid: pid != wpid");
 445
 446         timespecsub(&tfinish, &tstart, &tfinish);
 447
 448         return (tfinish);
 449 }
 450
 451 /*
 452  * When we print out results for larger pipeline sizes, we scale back by the
 453  * depth of the pipeline.  This generally means dividing by the pipeline
 454  * depth.  Except when it means dividing by zero.
 455  */
 456 static void
 457 scale_timespec(struct timespec *ts, int p)
 458 {
 459
 460         if (p == 0)
 461                 return;
 462
 463         ts->tv_sec /= p;
 464         ts->tv_nsec /= p;
 465 }
 466
 467 static const struct ipctype {
 468         int             (*it_create)(int *fd1p, int *fd2p);
 469         const char      *it_name;
 470 } ipctypes[] = {
 471         { pipe_create, "pipe" },
 472         { udp_create, "udp" },
 473         { socketpairdgram_create, "socketpairdgram" },
 474         { socketpairstream_create, "socketpairstream" },
 475 };
 476 static const int ipctypes_len = (sizeof(ipctypes) / sizeof(struct ipctype));
 477
 478 int
 479 main(int argc, char *argv[])
 480 {
 481         struct timespec juggle_results[LOOPS], process_results[LOOPS];
 482         struct timespec thread_results[LOOPS];
 483         int fd1, fd2, i, j, p;
 484         struct utsname uts;
 485
 486         printf("version, juggle.c %s\n", "$FreeBSD$");
 487
 488         if (uname(&uts) < 0)
 489                 err(-1, "utsname");
 490         printf("sysname, %s\n", uts.sysname);
 491         printf("nodename, %s\n", uts.nodename);
 492         printf("release, %s\n", uts.release);
 493         printf("version, %s\n", uts.version);
 494         printf("machine, %s\n", uts.machine);
 495         printf("\n");
 496
 497         printf("MESSAGELEN, %d\n", MESSAGELEN);
 498         printf("NUMCYCLES, %d\n", NUMCYCLES);
 499         printf("LOOPS, %d\n", LOOPS);
 500         printf("PIPELINE_MAX, %d\n", PIPELINE_MAX);
 501         printf("\n\n");
 502
 503         printf("ipctype, test, pipeline_depth");
 504         for (j = 0; j < LOOPS; j++)
 505                 printf(", data%d", j);
 506         printf("\n");
 507         fflush(stdout);
 508         for (p = 0; p < PIPELINE_MAX + 1; p++) {
 509                 for (i = 0; i < ipctypes_len; i++) {
 510                         if (ipctypes[i].it_create(&fd1, &fd2) < 0)
 511                                 err(-1, "main: %s", ipctypes[i].it_name);
 512
 513                         /*
 514                          * For each test, do one uncounted warmup, then LOOPS
 515                          * runs of the actual test.
 516                          */
 517                         juggle(fd1, fd2, p);
 518                         for (j = 0; j < LOOPS; j++)
 519                                 juggle_results[j] = juggle(fd1, fd2, p);
 520                         process_juggle(fd1, fd2, p);
 521                         for (j = 0; j < LOOPS; j++)
 522                                 process_results[j] = process_juggle(fd1, fd2,
 523                                     p);
 524                         thread_juggle(fd1, fd2, p);
 525                         for (j = 0; j < LOOPS; j++)
 526                                 thread_results[j] = thread_juggle(fd1, fd2,
 527                                     p);
 528                         for (j = 0; j < LOOPS; j++) {
 529                                 thread_results[j].tv_sec = 0;
 530                                 thread_results[j].tv_nsec = 0;
 531                         }
 532                         close(fd1);
 533                         close(fd2);
 534                 }
 535                 /*
 536                  * When printing results for the round, normalize the results
 537                  * with respect to the pipeline depth.  We're doing p times
 538                  * as much work, and are we taking p times as long?
 539                  */
 540                 for (i = 0; i < ipctypes_len; i++) {
 541                         printf("%s, juggle, %d, ", ipctypes[i].it_name, p);
 542                         for (j = 0; j < LOOPS; j++) {
 543                                 if (j != 0)
 544                                         printf(", ");
 545                                 scale_timespec(&juggle_results[j], p);
 546                                 printf("%jd.%09lu",
 547                                     (intmax_t)juggle_results[j].tv_sec,
 548                                     juggle_results[j].tv_nsec);
 549                         }
 550                         printf("\n");
 551                         printf("%s, process_juggle, %d, ",
 552                             ipctypes[i].it_name, p);
 553                         for (j = 0; j < LOOPS; j++) {
 554                                 if (j != 0)
 555                                         printf(", ");
 556                                 scale_timespec(&process_results[j], p);
 557                                 printf("%jd.%09lu",
 558                                     (intmax_t)process_results[j].tv_sec,
 559                                     process_results[j].tv_nsec);
 560                         }
 561                         printf("\n");
 562                         printf("%s, thread_juggle, %d, ",
 563                             ipctypes[i].it_name, p);
 564                         for (j = 0; j < LOOPS; j++) {
 565                                 if (j != 0)
 566                                         printf(", ");
 567                                 scale_timespec(&thread_results[j], p);
 568                                 printf("%jd.%09lu",
 569                                     (intmax_t)thread_results[j].tv_sec,
 570                                     thread_results[j].tv_nsec);
 571                         }
 572                         printf("\n");
 573                 }
 574                 fflush(stdout);
 575         }
 576         return (0);
 577 }