contrib/cortex-strings/scripts/bench.py

   1 #!/usr/bin/env python
   2
   3 """Simple harness that benchmarks different variants of the routines,
   4 caches the results, and emits all of the records at the end.
   5
   6 Results are generated for different values of:
   7  * Source
   8  * Routine
   9  * Length
  10  * Alignment
  11 """
  12
  13 import argparse
  14 import subprocess
  15 import math
  16 import sys
  17
  18 # Prefix to the executables
  19 build = '../build/try-'
  20
  21 ALL = 'memchr memcmp memcpy memset strchr strcmp strcpy strlen'
  22
  23 HAS = {
  24     'this': 'bounce memchr memcpy memset strchr strcmp strcpy strlen',
  25     'bionic-a9': 'memcmp memcpy memset strcmp strcpy strlen',
  26     'bionic-a15': 'memcmp memcpy memset strcmp strcpy strlen',
  27     'bionic-c': ALL,
  28     'csl': 'memcpy memset',
  29     'glibc': 'memcpy memset strchr strlen',
  30     'glibc-c': ALL,
  31     'newlib': 'memcpy strcmp strcpy strlen',
  32     'newlib-c': ALL,
  33     'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen',
  34     'plain': 'memset memcpy strcmp strcpy',
  35 }
  36
  37 BOUNCE_ALIGNMENTS = ['1']
  38 SINGLE_BUFFER_ALIGNMENTS = ['1', '2', '4', '8', '16', '32']
  39 DUAL_BUFFER_ALIGNMENTS = ['1:32', '2:32', '4:32', '8:32', '16:32', '32:32']
  40
  41 ALIGNMENTS = {
  42     'bounce': BOUNCE_ALIGNMENTS,
  43     'memchr': SINGLE_BUFFER_ALIGNMENTS,
  44     'memset': SINGLE_BUFFER_ALIGNMENTS,
  45     'strchr': SINGLE_BUFFER_ALIGNMENTS,
  46     'strlen': SINGLE_BUFFER_ALIGNMENTS,
  47     'memcmp': DUAL_BUFFER_ALIGNMENTS,
  48     'memcpy': DUAL_BUFFER_ALIGNMENTS,
  49     'strcmp': DUAL_BUFFER_ALIGNMENTS,
  50     'strcpy': DUAL_BUFFER_ALIGNMENTS,
  51 }
  52
  53 VARIANTS = sorted(HAS.keys())
  54 FUNCTIONS = sorted(ALIGNMENTS.keys())
  55
  56 NUM_RUNS = 5
  57
  58 def run(cache, variant, function, bytes, loops, alignment, run_id, quiet=False):
  59     """Perform a single run, exercising the cache as appropriate."""
  60     key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment, run_id))
  61
  62     if key in cache:
  63         got = cache[key]
  64     else:
  65         xbuild = build
  66         cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s -r %(run_id)s' % locals()
  67
  68         try:
  69             got = subprocess.check_output(cmd.split()).strip()
  70         except OSError, ex:
  71             assert False, 'Error %s while running %s' % (ex, cmd)
  72
  73     parts = got.split(':')
  74     took = float(parts[7])
  75
  76     cache[key] = got
  77
  78     if not quiet:
  79         print got
  80         sys.stdout.flush()
  81
  82     return took
  83
  84 def run_many(cache, variants, bytes, all_functions):
  85     # We want the data to come out in a useful order.  So fix an
  86     # alignment and function, and do all sizes for a variant first
  87     bytes = sorted(bytes)
  88     mid = bytes[int(len(bytes)/1.5)]
  89
  90     if not all_functions:
  91         # Use the ordering in 'this' as the default
  92         all_functions = HAS['this'].split()
  93
  94         # Find all other functions
  95         for functions in HAS.values():
  96             for function in functions.split():
  97                 if function not in all_functions:
  98                     all_functions.append(function)
  99
 100     for function in all_functions:
 101         for alignment in ALIGNMENTS[function]:
 102             for variant in variants:
 103                 if function not in HAS[variant].split():
 104                     continue
 105
 106                 # Run a tracer through and see how long it takes and
 107                 # adjust the number of loops based on that.  Not great
 108                 # for memchr() and similar which are O(n), but it will
 109                 # do
 110                 f = 50000000
 111                 want = 5.0
 112
 113                 loops = int(f / math.sqrt(max(1, mid)))
 114                 took = run(cache, variant, function, mid, loops, alignment, 0,
 115                            quiet=True)
 116                 # Keep it reasonable for silly routines like bounce
 117                 factor = min(20, max(0.05, want/took))
 118                 f = f * factor
 119
 120                 # Round f to a few significant figures
 121                 scale = 10**int(math.log10(f) - 1)
 122                 f = scale*int(f/scale)
 123
 124                 for b in sorted(bytes):
 125                     # Figure out the number of loops to give a roughly consistent run
 126                     loops = int(f / math.sqrt(max(1, b)))
 127                     for run_id in range(0, NUM_RUNS):
 128                         run(cache, variant, function, b, loops, alignment,
 129                             run_id)
 130
 131 def run_top(cache):
 132     parser = argparse.ArgumentParser()
 133     parser.add_argument("-v", "--variants", nargs="+", help="library variant to run (run all if not specified)", default = VARIANTS, choices = VARIANTS)
 134     parser.add_argument("-f", "--functions", nargs="+", help="function to run (run all if not specified)", default = FUNCTIONS, choices = FUNCTIONS)
 135     parser.add_argument("-l", "--limit", type=int, help="upper limit to test to (in bytes)", default = 512*1024)
 136     args = parser.parse_args()
 137
 138     # Test all powers of 2
 139     step1 = 2.0
 140     # Test intermediate powers of 1.4
 141     step2 = 1.4
 142
 143     bytes = []
 144
 145     for step in [step1, step2]:
 146         if step:
 147             # Figure out how many steps get us up to the top
 148             steps = int(round(math.log(args.limit) / math.log(step)))
 149             bytes.extend([int(step**x) for x in range(0, steps+1)])
 150
 151     run_many(cache, args.variants, bytes, args.functions)
 152
 153 def main():
 154     cachename = 'cache.txt'
 155
 156     cache = {}
 157
 158     try:
 159         with open(cachename) as f:
 160             for line in f:
 161                 line = line.strip()
 162                 parts = line.split(':')
 163                 cache[':'.join(parts[:7])] = line
 164     except:
 165         pass
 166
 167     try:
 168         run_top(cache)
 169     finally:
 170         with open(cachename, 'w') as f:
 171             for line in sorted(cache.values()):
 172                 print >> f, line
 173
 174 if __name__ == '__main__':
 175     main()