tests/fuzz/fuzz.py

   1 #!/usr/bin/env python
   2
   3 # ################################################################
   4 # Copyright (c) 2016-present, Facebook, Inc.
   5 # All rights reserved.
   6 #
   7 # This source code is licensed under both the BSD-style license (found in the
   8 # LICENSE file in the root directory of this source tree) and the GPLv2 (found
   9 # in the COPYING file in the root directory of this source tree).
  10 # ##########################################################################
  11
  12 import argparse
  13 import contextlib
  14 import os
  15 import re
  16 import shlex
  17 import shutil
  18 import subprocess
  19 import sys
  20 import tempfile
  21
  22
  23 def abs_join(a, *p):
  24     return os.path.abspath(os.path.join(a, *p))
  25
  26
  27 class InputType(object):
  28     RAW_DATA = 1
  29     COMPRESSED_DATA = 2
  30     DICTIONARY_DATA = 3
  31
  32
  33 class FrameType(object):
  34     ZSTD = 1
  35     BLOCK = 2
  36
  37
  38 class TargetInfo(object):
  39     def __init__(self, input_type, frame_type=FrameType.ZSTD):
  40         self.input_type = input_type
  41         self.frame_type = frame_type
  42
  43
  44 # Constants
  45 FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
  46 CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
  47 TARGET_INFO = {
  48     'simple_round_trip': TargetInfo(InputType.RAW_DATA),
  49     'stream_round_trip': TargetInfo(InputType.RAW_DATA),
  50     'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
  51     'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
  52     'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
  53     'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
  54     'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
  55     'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
  56     'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
  57     'simple_compress': TargetInfo(InputType.RAW_DATA),
  58     'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
  59 }
  60 TARGETS = list(TARGET_INFO.keys())
  61 ALL_TARGETS = TARGETS + ['all']
  62 FUZZ_RNG_SEED_SIZE = 4
  63
  64 # Standard environment variables
  65 CC = os.environ.get('CC', 'cc')
  66 CXX = os.environ.get('CXX', 'c++')
  67 CPPFLAGS = os.environ.get('CPPFLAGS', '')
  68 CFLAGS = os.environ.get('CFLAGS', '-O3')
  69 CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
  70 LDFLAGS = os.environ.get('LDFLAGS', '')
  71 MFLAGS = os.environ.get('MFLAGS', '-j')
  72
  73 # Fuzzing environment variables
  74 LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
  75 AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
  76 DECODECORPUS = os.environ.get('DECODECORPUS',
  77                               abs_join(FUZZ_DIR, '..', 'decodecorpus'))
  78 ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
  79
  80 # Sanitizer environment variables
  81 MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
  82 MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
  83 MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
  84 MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
  85
  86
  87 def create(r):
  88     d = os.path.abspath(r)
  89     if not os.path.isdir(d):
  90         os.makedirs(d)
  91     return d
  92
  93
  94 def check(r):
  95     d = os.path.abspath(r)
  96     if not os.path.isdir(d):
  97         return None
  98     return d
  99
 100
 101 @contextlib.contextmanager
 102 def tmpdir():
 103     dirpath = tempfile.mkdtemp()
 104     try:
 105         yield dirpath
 106     finally:
 107         shutil.rmtree(dirpath, ignore_errors=True)
 108
 109
 110 def parse_targets(in_targets):
 111     targets = set()
 112     for target in in_targets:
 113         if not target:
 114             continue
 115         if target == 'all':
 116             targets = targets.union(TARGETS)
 117         elif target in TARGETS:
 118             targets.add(target)
 119         else:
 120             raise RuntimeError('{} is not a valid target'.format(target))
 121     return list(targets)
 122
 123
 124 def targets_parser(args, description):
 125     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 126     parser.add_argument(
 127         'TARGET',
 128         nargs='*',
 129         type=str,
 130         help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
 131     args, extra = parser.parse_known_args(args)
 132     args.extra = extra
 133
 134     args.TARGET = parse_targets(args.TARGET)
 135
 136     return args
 137
 138
 139 def parse_env_flags(args, flags):
 140     """
 141     Look for flags set by environment variables.
 142     """
 143     san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
 144     nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
 145
 146     def set_sanitizer(sanitizer, default, san, nosan):
 147         if sanitizer in san and sanitizer in nosan:
 148             raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
 149                                format(s=sanitizer))
 150         if sanitizer in san:
 151             return True
 152         if sanitizer in nosan:
 153             return False
 154         return default
 155
 156     san = set(san_flags.split(','))
 157     nosan = set(nosan_flags.split(','))
 158
 159     args.asan = set_sanitizer('address', args.asan, san, nosan)
 160     args.msan = set_sanitizer('memory', args.msan, san, nosan)
 161     args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
 162
 163     args.sanitize = args.asan or args.msan or args.ubsan
 164
 165     return args
 166
 167
 168 def compiler_version(cc, cxx):
 169     """
 170     Determines the compiler and version.
 171     Only works for clang and gcc.
 172     """
 173     cc_version_bytes = subprocess.check_output([cc, "--version"])
 174     cxx_version_bytes = subprocess.check_output([cxx, "--version"])
 175     compiler = None
 176     version = None
 177     if b'clang' in cc_version_bytes:
 178         assert(b'clang' in cxx_version_bytes)
 179         compiler = 'clang'
 180     elif b'gcc' in cc_version_bytes:
 181         assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
 182         compiler = 'gcc'
 183     if compiler is not None:
 184         version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
 185         version_match = re.search(version_regex, cc_version_bytes)
 186         version = tuple(int(version_match.group(i)) for i in range(1, 4))
 187     return compiler, version
 188
 189
 190 def overflow_ubsan_flags(cc, cxx):
 191     compiler, version = compiler_version(cc, cxx)
 192     if compiler == 'gcc':
 193         return ['-fno-sanitize=signed-integer-overflow']
 194     if compiler == 'clang' and version >= (5, 0, 0):
 195         return ['-fno-sanitize=pointer-overflow']
 196     return []
 197
 198
 199 def build_parser(args):
 200     description = """
 201     Cleans the repository and builds a fuzz target (or all).
 202     Many flags default to environment variables (default says $X='y').
 203     Options that aren't enabling features default to the correct values for
 204     zstd.
 205     Enable sanitizers with --enable-*san.
 206     For regression testing just build.
 207     For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
 208     For AFL set CC and CXX to AFL's compilers and set
 209     LIB_FUZZING_ENGINE='libregression.a'.
 210     """
 211     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 212     parser.add_argument(
 213         '--lib-fuzzing-engine',
 214         dest='lib_fuzzing_engine',
 215         type=str,
 216         default=LIB_FUZZING_ENGINE,
 217         help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
 218               "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
 219
 220     fuzz_group = parser.add_mutually_exclusive_group()
 221     fuzz_group.add_argument(
 222         '--enable-coverage',
 223         dest='coverage',
 224         action='store_true',
 225         help='Enable coverage instrumentation (-fsanitize-coverage)')
 226     fuzz_group.add_argument(
 227         '--enable-fuzzer',
 228         dest='fuzzer',
 229         action='store_true',
 230         help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
 231               'LIB_FUZZING_ENGINE is ignored')
 232     )
 233
 234     parser.add_argument(
 235         '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
 236     parser.add_argument(
 237         '--enable-ubsan',
 238         dest='ubsan',
 239         action='store_true',
 240         help='Enable UBSAN')
 241     parser.add_argument(
 242         '--enable-ubsan-pointer-overflow',
 243         dest='ubsan_pointer_overflow',
 244         action='store_true',
 245         help='Enable UBSAN pointer overflow check (known failure)')
 246     parser.add_argument(
 247         '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
 248     parser.add_argument(
 249         '--enable-msan-track-origins', dest='msan_track_origins',
 250         action='store_true', help='Enable MSAN origin tracking')
 251     parser.add_argument(
 252         '--msan-extra-cppflags',
 253         dest='msan_extra_cppflags',
 254         type=str,
 255         default=MSAN_EXTRA_CPPFLAGS,
 256         help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
 257         format(MSAN_EXTRA_CPPFLAGS))
 258     parser.add_argument(
 259         '--msan-extra-cflags',
 260         dest='msan_extra_cflags',
 261         type=str,
 262         default=MSAN_EXTRA_CFLAGS,
 263         help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
 264             MSAN_EXTRA_CFLAGS))
 265     parser.add_argument(
 266         '--msan-extra-cxxflags',
 267         dest='msan_extra_cxxflags',
 268         type=str,
 269         default=MSAN_EXTRA_CXXFLAGS,
 270         help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
 271         format(MSAN_EXTRA_CXXFLAGS))
 272     parser.add_argument(
 273         '--msan-extra-ldflags',
 274         dest='msan_extra_ldflags',
 275         type=str,
 276         default=MSAN_EXTRA_LDFLAGS,
 277         help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
 278         format(MSAN_EXTRA_LDFLAGS))
 279     parser.add_argument(
 280         '--enable-sanitize-recover',
 281         dest='sanitize_recover',
 282         action='store_true',
 283         help='Non-fatal sanitizer errors where possible')
 284     parser.add_argument(
 285         '--debug',
 286         dest='debug',
 287         type=int,
 288         default=1,
 289         help='Set DEBUGLEVEL (default: 1)')
 290     parser.add_argument(
 291         '--force-memory-access',
 292         dest='memory_access',
 293         type=int,
 294         default=0,
 295         help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
 296     parser.add_argument(
 297         '--fuzz-rng-seed-size',
 298         dest='fuzz_rng_seed_size',
 299         type=int,
 300         default=4,
 301         help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
 302     parser.add_argument(
 303         '--disable-fuzzing-mode',
 304         dest='fuzzing_mode',
 305         action='store_false',
 306         help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
 307     parser.add_argument(
 308         '--enable-stateful-fuzzing',
 309         dest='stateful_fuzzing',
 310         action='store_true',
 311         help='Reuse contexts between runs (makes reproduction impossible)')
 312     parser.add_argument(
 313         '--cc',
 314         dest='cc',
 315         type=str,
 316         default=CC,
 317         help="CC (default: $CC='{}')".format(CC))
 318     parser.add_argument(
 319         '--cxx',
 320         dest='cxx',
 321         type=str,
 322         default=CXX,
 323         help="CXX (default: $CXX='{}')".format(CXX))
 324     parser.add_argument(
 325         '--cppflags',
 326         dest='cppflags',
 327         type=str,
 328         default=CPPFLAGS,
 329         help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
 330     parser.add_argument(
 331         '--cflags',
 332         dest='cflags',
 333         type=str,
 334         default=CFLAGS,
 335         help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
 336     parser.add_argument(
 337         '--cxxflags',
 338         dest='cxxflags',
 339         type=str,
 340         default=CXXFLAGS,
 341         help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
 342     parser.add_argument(
 343         '--ldflags',
 344         dest='ldflags',
 345         type=str,
 346         default=LDFLAGS,
 347         help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
 348     parser.add_argument(
 349         '--mflags',
 350         dest='mflags',
 351         type=str,
 352         default=MFLAGS,
 353         help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
 354     parser.add_argument(
 355         'TARGET',
 356         nargs='*',
 357         type=str,
 358         help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
 359     )
 360     args = parser.parse_args(args)
 361     args = parse_env_flags(args, ' '.join(
 362         [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
 363
 364     # Check option sanity
 365     if args.msan and (args.asan or args.ubsan):
 366         raise RuntimeError('MSAN may not be used with any other sanitizers')
 367     if args.msan_track_origins and not args.msan:
 368         raise RuntimeError('--enable-msan-track-origins requires MSAN')
 369     if args.ubsan_pointer_overflow and not args.ubsan:
 370         raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN')
 371     if args.sanitize_recover and not args.sanitize:
 372         raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
 373
 374     return args
 375
 376
 377 def build(args):
 378     try:
 379         args = build_parser(args)
 380     except Exception as e:
 381         print(e)
 382         return 1
 383     # The compilation flags we are setting
 384     targets = args.TARGET
 385     cc = args.cc
 386     cxx = args.cxx
 387     cppflags = shlex.split(args.cppflags)
 388     cflags = shlex.split(args.cflags)
 389     ldflags = shlex.split(args.ldflags)
 390     cxxflags = shlex.split(args.cxxflags)
 391     mflags = shlex.split(args.mflags)
 392     # Flags to be added to both cflags and cxxflags
 393     common_flags = []
 394
 395     cppflags += [
 396         '-DDEBUGLEVEL={}'.format(args.debug),
 397         '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
 398         '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
 399     ]
 400
 401     # Set flags for options
 402     assert not (args.fuzzer and args.coverage)
 403     if args.coverage:
 404         common_flags += [
 405             '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
 406         ]
 407     if args.fuzzer:
 408         common_flags += ['-fsanitize=fuzzer']
 409         args.lib_fuzzing_engine = ''
 410
 411     mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
 412
 413     if args.sanitize_recover:
 414         recover_flags = ['-fsanitize-recover=all']
 415     else:
 416         recover_flags = ['-fno-sanitize-recover=all']
 417     if args.sanitize:
 418         common_flags += recover_flags
 419
 420     if args.msan:
 421         msan_flags = ['-fsanitize=memory']
 422         if args.msan_track_origins:
 423             msan_flags += ['-fsanitize-memory-track-origins']
 424         common_flags += msan_flags
 425         # Append extra MSAN flags (it might require special setup)
 426         cppflags += [args.msan_extra_cppflags]
 427         cflags += [args.msan_extra_cflags]
 428         cxxflags += [args.msan_extra_cxxflags]
 429         ldflags += [args.msan_extra_ldflags]
 430
 431     if args.asan:
 432         common_flags += ['-fsanitize=address']
 433
 434     if args.ubsan:
 435         ubsan_flags = ['-fsanitize=undefined']
 436         if not args.ubsan_pointer_overflow:
 437             ubsan_flags += overflow_ubsan_flags(cc, cxx)
 438         common_flags += ubsan_flags
 439
 440     if args.stateful_fuzzing:
 441         cppflags += ['-DSTATEFUL_FUZZING']
 442
 443     if args.fuzzing_mode:
 444         cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
 445
 446     if args.lib_fuzzing_engine == 'libregression.a':
 447         targets = ['libregression.a'] + targets
 448
 449     # Append the common flags
 450     cflags += common_flags
 451     cxxflags += common_flags
 452
 453     # Prepare the flags for Make
 454     cc_str = "CC={}".format(cc)
 455     cxx_str = "CXX={}".format(cxx)
 456     cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
 457     cflags_str = "CFLAGS={}".format(' '.join(cflags))
 458     cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
 459     ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
 460
 461     # Print the flags
 462     print('MFLAGS={}'.format(' '.join(mflags)))
 463     print(cc_str)
 464     print(cxx_str)
 465     print(cppflags_str)
 466     print(cflags_str)
 467     print(cxxflags_str)
 468     print(ldflags_str)
 469
 470     # Clean and build
 471     clean_cmd = ['make', 'clean'] + mflags
 472     print(' '.join(clean_cmd))
 473     subprocess.check_call(clean_cmd)
 474     build_cmd = [
 475         'make',
 476         cc_str,
 477         cxx_str,
 478         cppflags_str,
 479         cflags_str,
 480         cxxflags_str,
 481         ldflags_str,
 482     ] + mflags + targets
 483     print(' '.join(build_cmd))
 484     subprocess.check_call(build_cmd)
 485     return 0
 486
 487
 488 def libfuzzer_parser(args):
 489     description = """
 490     Runs a libfuzzer binary.
 491     Passes all extra arguments to libfuzzer.
 492     The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
 493     libFuzzer.a.
 494     Generates output in the CORPORA directory, puts crashes in the ARTIFACT
 495     directory, and takes extra input from the SEED directory.
 496     To merge AFL's output pass the SEED as AFL's output directory and pass
 497     '-merge=1'.
 498     """
 499     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 500     parser.add_argument(
 501         '--corpora',
 502         type=str,
 503         help='Override the default corpora dir (default: {})'.format(
 504             abs_join(CORPORA_DIR, 'TARGET')))
 505     parser.add_argument(
 506         '--artifact',
 507         type=str,
 508         help='Override the default artifact dir (default: {})'.format(
 509             abs_join(CORPORA_DIR, 'TARGET-crash')))
 510     parser.add_argument(
 511         '--seed',
 512         type=str,
 513         help='Override the default seed dir (default: {})'.format(
 514             abs_join(CORPORA_DIR, 'TARGET-seed')))
 515     parser.add_argument(
 516         'TARGET',
 517         type=str,
 518         help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
 519     args, extra = parser.parse_known_args(args)
 520     args.extra = extra
 521
 522     if args.TARGET and args.TARGET not in TARGETS:
 523         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
 524
 525     return args
 526
 527
 528 def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
 529     if corpora is None:
 530         corpora = abs_join(CORPORA_DIR, target)
 531     if artifact is None:
 532         artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
 533     if seed is None:
 534         seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
 535     if extra_args is None:
 536         extra_args = []
 537
 538     target = abs_join(FUZZ_DIR, target)
 539
 540     corpora = [create(corpora)]
 541     artifact = create(artifact)
 542     seed = check(seed)
 543
 544     corpora += [artifact]
 545     if seed is not None:
 546         corpora += [seed]
 547
 548     cmd = [target, '-artifact_prefix={}/'.format(artifact)]
 549     cmd += corpora + extra_args
 550     print(' '.join(cmd))
 551     subprocess.check_call(cmd)
 552
 553
 554 def libfuzzer_cmd(args):
 555     try:
 556         args = libfuzzer_parser(args)
 557     except Exception as e:
 558         print(e)
 559         return 1
 560     libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
 561     return 0
 562
 563
 564 def afl_parser(args):
 565     description = """
 566     Runs an afl-fuzz job.
 567     Passes all extra arguments to afl-fuzz.
 568     The fuzzer should have been built with CC/CXX set to the AFL compilers,
 569     and with LIB_FUZZING_ENGINE='libregression.a'.
 570     Takes input from CORPORA and writes output to OUTPUT.
 571     Uses AFL_FUZZ as the binary (set from flag or environment variable).
 572     """
 573     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 574     parser.add_argument(
 575         '--corpora',
 576         type=str,
 577         help='Override the default corpora dir (default: {})'.format(
 578             abs_join(CORPORA_DIR, 'TARGET')))
 579     parser.add_argument(
 580         '--output',
 581         type=str,
 582         help='Override the default AFL output dir (default: {})'.format(
 583             abs_join(CORPORA_DIR, 'TARGET-afl')))
 584     parser.add_argument(
 585         '--afl-fuzz',
 586         type=str,
 587         default=AFL_FUZZ,
 588         help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
 589     parser.add_argument(
 590         'TARGET',
 591         type=str,
 592         help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
 593     args, extra = parser.parse_known_args(args)
 594     args.extra = extra
 595
 596     if args.TARGET and args.TARGET not in TARGETS:
 597         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
 598
 599     if not args.corpora:
 600         args.corpora = abs_join(CORPORA_DIR, args.TARGET)
 601     if not args.output:
 602         args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
 603
 604     return args
 605
 606
 607 def afl(args):
 608     try:
 609         args = afl_parser(args)
 610     except Exception as e:
 611         print(e)
 612         return 1
 613     target = abs_join(FUZZ_DIR, args.TARGET)
 614
 615     corpora = create(args.corpora)
 616     output = create(args.output)
 617
 618     cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
 619     cmd += [target, '@@']
 620     print(' '.join(cmd))
 621     subprocess.call(cmd)
 622     return 0
 623
 624
 625 def regression(args):
 626     try:
 627         description = """
 628         Runs one or more regression tests.
 629         The fuzzer should have been built with with
 630         LIB_FUZZING_ENGINE='libregression.a'.
 631         Takes input from CORPORA.
 632         """
 633         args = targets_parser(args, description)
 634     except Exception as e:
 635         print(e)
 636         return 1
 637     for target in args.TARGET:
 638         corpora = create(abs_join(CORPORA_DIR, target))
 639         target = abs_join(FUZZ_DIR, target)
 640         cmd = [target, corpora]
 641         print(' '.join(cmd))
 642         subprocess.check_call(cmd)
 643     return 0
 644
 645
 646 def gen_parser(args):
 647     description = """
 648     Generate a seed corpus appropriate for TARGET with data generated with
 649     decodecorpus.
 650     The fuzz inputs are prepended with a seed before the zstd data, so the
 651     output of decodecorpus shouldn't be used directly.
 652     Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
 653     puts the output in SEED.
 654     DECODECORPUS is the decodecorpus binary, and must already be built.
 655     """
 656     parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
 657     parser.add_argument(
 658         '--number',
 659         '-n',
 660         type=int,
 661         default=100,
 662         help='Number of samples to generate')
 663     parser.add_argument(
 664         '--max-size-log',
 665         type=int,
 666         default=18,
 667         help='Maximum sample size to generate')
 668     parser.add_argument(
 669         '--seed',
 670         type=str,
 671         help='Override the default seed dir (default: {})'.format(
 672             abs_join(CORPORA_DIR, 'TARGET-seed')))
 673     parser.add_argument(
 674         '--decodecorpus',
 675         type=str,
 676         default=DECODECORPUS,
 677         help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
 678             DECODECORPUS))
 679     parser.add_argument(
 680         '--zstd',
 681         type=str,
 682         default=ZSTD,
 683         help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
 684     parser.add_argument(
 685         '--fuzz-rng-seed-size',
 686         type=int,
 687         default=4,
 688         help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
 689     )
 690     parser.add_argument(
 691         'TARGET',
 692         type=str,
 693         help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
 694     args, extra = parser.parse_known_args(args)
 695     args.extra = extra
 696
 697     if args.TARGET and args.TARGET not in TARGETS:
 698         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
 699
 700     if not args.seed:
 701         args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
 702
 703     if not os.path.isfile(args.decodecorpus):
 704         raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
 705                            format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
 706
 707     return args
 708
 709
 710 def gen(args):
 711     try:
 712         args = gen_parser(args)
 713     except Exception as e:
 714         print(e)
 715         return 1
 716
 717     seed = create(args.seed)
 718     with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
 719         info = TARGET_INFO[args.TARGET]
 720
 721         if info.input_type == InputType.DICTIONARY_DATA:
 722             number = max(args.number, 1000)
 723         else:
 724             number = args.number
 725         cmd = [
 726             args.decodecorpus,
 727             '-n{}'.format(args.number),
 728             '-p{}/'.format(compressed),
 729             '-o{}'.format(decompressed),
 730         ]
 731
 732         if info.frame_type == FrameType.BLOCK:
 733             cmd += [
 734                 '--gen-blocks',
 735                 '--max-block-size-log={}'.format(min(args.max_size_log, 17))
 736             ]
 737         else:
 738             cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
 739
 740         print(' '.join(cmd))
 741         subprocess.check_call(cmd)
 742
 743         if info.input_type == InputType.RAW_DATA:
 744             print('using decompressed data in {}'.format(decompressed))
 745             samples = decompressed
 746         elif info.input_type == InputType.COMPRESSED_DATA:
 747             print('using compressed data in {}'.format(compressed))
 748             samples = compressed
 749         else:
 750             assert info.input_type == InputType.DICTIONARY_DATA
 751             print('making dictionary data from {}'.format(decompressed))
 752             samples = dict
 753             min_dict_size_log = 9
 754             max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
 755             for dict_size_log in range(min_dict_size_log, max_dict_size_log):
 756                 dict_size = 1 << dict_size_log
 757                 cmd = [
 758                     args.zstd,
 759                     '--train',
 760                     '-r', decompressed,
 761                     '--maxdict={}'.format(dict_size),
 762                     '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
 763                 ]
 764                 print(' '.join(cmd))
 765                 subprocess.check_call(cmd)
 766
 767         # Copy the samples over and prepend the RNG seeds
 768         for name in os.listdir(samples):
 769             samplename = abs_join(samples, name)
 770             outname = abs_join(seed, name)
 771             with open(samplename, 'rb') as sample:
 772                 with open(outname, 'wb') as out:
 773                     CHUNK_SIZE = 131072
 774                     chunk = sample.read(CHUNK_SIZE)
 775                     while len(chunk) > 0:
 776                         out.write(chunk)
 777                         chunk = sample.read(CHUNK_SIZE)
 778     return 0
 779
 780
 781 def minimize(args):
 782     try:
 783         description = """
 784         Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
 785         TARGET_seed_corpus. All extra args are passed to libfuzzer.
 786         """
 787         args = targets_parser(args, description)
 788     except Exception as e:
 789         print(e)
 790         return 1
 791
 792     for target in args.TARGET:
 793         # Merge the corpus + anything else into the seed_corpus
 794         corpus = abs_join(CORPORA_DIR, target)
 795         seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
 796         extra_args = [corpus, "-merge=1"] + args.extra
 797         libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
 798         seeds = set(os.listdir(seed_corpus))
 799         # Copy all crashes directly into the seed_corpus if not already present
 800         crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
 801         for crash in os.listdir(crashes):
 802             if crash not in seeds:
 803                 shutil.copy(abs_join(crashes, crash), seed_corpus)
 804                 seeds.add(crash)
 805
 806
 807 def zip_cmd(args):
 808     try:
 809         description = """
 810         Zips up the seed corpus.
 811         """
 812         args = targets_parser(args, description)
 813     except Exception as e:
 814         print(e)
 815         return 1
 816
 817     for target in args.TARGET:
 818         # Zip the seed_corpus
 819         seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
 820         zip_file = "{}.zip".format(seed_corpus)
 821         cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
 822         print(' '.join(cmd))
 823         subprocess.check_call(cmd, cwd=seed_corpus)
 824
 825
 826 def list_cmd(args):
 827     print("\n".join(TARGETS))
 828
 829
 830 def short_help(args):
 831     name = args[0]
 832     print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
 833
 834
 835 def help(args):
 836     short_help(args)
 837     print("\tfuzzing helpers (select a command and pass -h for help)\n")
 838     print("Options:")
 839     print("\t-h, --help\tPrint this message")
 840     print("")
 841     print("Commands:")
 842     print("\tbuild\t\tBuild a fuzzer")
 843     print("\tlibfuzzer\tRun a libFuzzer fuzzer")
 844     print("\tafl\t\tRun an AFL fuzzer")
 845     print("\tregression\tRun a regression test")
 846     print("\tgen\t\tGenerate a seed corpus for a fuzzer")
 847     print("\tminimize\tMinimize the test corpora")
 848     print("\tzip\t\tZip the minimized corpora up")
 849     print("\tlist\t\tList the available targets")
 850
 851
 852 def main():
 853     args = sys.argv
 854     if len(args) < 2:
 855         help(args)
 856         return 1
 857     if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
 858         help(args)
 859         return 1
 860     command = args.pop(1)
 861     args[0] = "{} {}".format(args[0], command)
 862     if command == "build":
 863         return build(args)
 864     if command == "libfuzzer":
 865         return libfuzzer_cmd(args)
 866     if command == "regression":
 867         return regression(args)
 868     if command == "afl":
 869         return afl(args)
 870     if command == "gen":
 871         return gen(args)
 872     if command == "minimize":
 873         return minimize(args)
 874     if command == "zip":
 875         return zip_cmd(args)
 876     if command == "list":
 877         return list_cmd(args)
 878     short_help(args)
 879     print("Error: No such command {} (pass -h for help)".format(command))
 880     return 1
 881
 882
 883 if __name__ == "__main__":
 884     sys.exit(main())