3 # ################################################################
4 # Copyright (c) 2016-present, Facebook, Inc.
7 # This source code is licensed under both the BSD-style license (found in the
8 # LICENSE file in the root directory of this source tree) and the GPLv2 (found
9 # in the COPYING file in the root directory of this source tree).
10 # ##########################################################################
24 return os.path.abspath(os.path.join(a, *p))
27 class InputType(object):
33 class FrameType(object):
38 class TargetInfo(object):
39 def __init__(self, input_type, frame_type=FrameType.ZSTD):
40 self.input_type = input_type
41 self.frame_type = frame_type
45 FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
46 CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
48 'simple_round_trip': TargetInfo(InputType.RAW_DATA),
49 'stream_round_trip': TargetInfo(InputType.RAW_DATA),
50 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
51 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
52 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
53 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
54 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
55 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
56 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
57 'simple_compress': TargetInfo(InputType.RAW_DATA),
58 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
60 TARGETS = list(TARGET_INFO.keys())
61 ALL_TARGETS = TARGETS + ['all']
62 FUZZ_RNG_SEED_SIZE = 4
64 # Standard environment variables
65 CC = os.environ.get('CC', 'cc')
66 CXX = os.environ.get('CXX', 'c++')
67 CPPFLAGS = os.environ.get('CPPFLAGS', '')
68 CFLAGS = os.environ.get('CFLAGS', '-O3')
69 CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
70 LDFLAGS = os.environ.get('LDFLAGS', '')
71 MFLAGS = os.environ.get('MFLAGS', '-j')
73 # Fuzzing environment variables
74 LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
75 AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
76 DECODECORPUS = os.environ.get('DECODECORPUS',
77 abs_join(FUZZ_DIR, '..', 'decodecorpus'))
78 ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
80 # Sanitizer environment variables
81 MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
82 MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
83 MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
84 MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
88 d = os.path.abspath(r)
89 if not os.path.isdir(d):
95 d = os.path.abspath(r)
96 if not os.path.isdir(d):
101 @contextlib.contextmanager
103 dirpath = tempfile.mkdtemp()
107 shutil.rmtree(dirpath, ignore_errors=True)
110 def parse_targets(in_targets):
112 for target in in_targets:
116 targets = targets.union(TARGETS)
117 elif target in TARGETS:
120 raise RuntimeError('{} is not a valid target'.format(target))
124 def targets_parser(args, description):
125 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
130 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
131 args, extra = parser.parse_known_args(args)
134 args.TARGET = parse_targets(args.TARGET)
139 def parse_env_flags(args, flags):
141 Look for flags set by environment variables.
143 san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
144 nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
146 def set_sanitizer(sanitizer, default, san, nosan):
147 if sanitizer in san and sanitizer in nosan:
148 raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
152 if sanitizer in nosan:
156 san = set(san_flags.split(','))
157 nosan = set(nosan_flags.split(','))
159 args.asan = set_sanitizer('address', args.asan, san, nosan)
160 args.msan = set_sanitizer('memory', args.msan, san, nosan)
161 args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
163 args.sanitize = args.asan or args.msan or args.ubsan
168 def compiler_version(cc, cxx):
170 Determines the compiler and version.
171 Only works for clang and gcc.
173 cc_version_bytes = subprocess.check_output([cc, "--version"])
174 cxx_version_bytes = subprocess.check_output([cxx, "--version"])
177 if b'clang' in cc_version_bytes:
178 assert(b'clang' in cxx_version_bytes)
180 elif b'gcc' in cc_version_bytes:
181 assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
183 if compiler is not None:
184 version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
185 version_match = re.search(version_regex, cc_version_bytes)
186 version = tuple(int(version_match.group(i)) for i in range(1, 4))
187 return compiler, version
190 def overflow_ubsan_flags(cc, cxx):
191 compiler, version = compiler_version(cc, cxx)
192 if compiler == 'gcc':
193 return ['-fno-sanitize=signed-integer-overflow']
194 if compiler == 'clang' and version >= (5, 0, 0):
195 return ['-fno-sanitize=pointer-overflow']
199 def build_parser(args):
201 Cleans the repository and builds a fuzz target (or all).
202 Many flags default to environment variables (default says $X='y').
203 Options that aren't enabling features default to the correct values for
205 Enable sanitizers with --enable-*san.
206 For regression testing just build.
207 For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
208 For AFL set CC and CXX to AFL's compilers and set
209 LIB_FUZZING_ENGINE='libregression.a'.
211 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
213 '--lib-fuzzing-engine',
214 dest='lib_fuzzing_engine',
216 default=LIB_FUZZING_ENGINE,
217 help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
218 "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
220 fuzz_group = parser.add_mutually_exclusive_group()
221 fuzz_group.add_argument(
225 help='Enable coverage instrumentation (-fsanitize-coverage)')
226 fuzz_group.add_argument(
230 help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
231 'LIB_FUZZING_ENGINE is ignored')
235 '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
242 '--enable-ubsan-pointer-overflow',
243 dest='ubsan_pointer_overflow',
245 help='Enable UBSAN pointer overflow check (known failure)')
247 '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
249 '--enable-msan-track-origins', dest='msan_track_origins',
250 action='store_true', help='Enable MSAN origin tracking')
252 '--msan-extra-cppflags',
253 dest='msan_extra_cppflags',
255 default=MSAN_EXTRA_CPPFLAGS,
256 help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
257 format(MSAN_EXTRA_CPPFLAGS))
259 '--msan-extra-cflags',
260 dest='msan_extra_cflags',
262 default=MSAN_EXTRA_CFLAGS,
263 help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
266 '--msan-extra-cxxflags',
267 dest='msan_extra_cxxflags',
269 default=MSAN_EXTRA_CXXFLAGS,
270 help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
271 format(MSAN_EXTRA_CXXFLAGS))
273 '--msan-extra-ldflags',
274 dest='msan_extra_ldflags',
276 default=MSAN_EXTRA_LDFLAGS,
277 help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
278 format(MSAN_EXTRA_LDFLAGS))
280 '--enable-sanitize-recover',
281 dest='sanitize_recover',
283 help='Non-fatal sanitizer errors where possible')
289 help='Set DEBUGLEVEL (default: 1)')
291 '--force-memory-access',
292 dest='memory_access',
295 help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
297 '--fuzz-rng-seed-size',
298 dest='fuzz_rng_seed_size',
301 help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
303 '--disable-fuzzing-mode',
305 action='store_false',
306 help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
308 '--enable-stateful-fuzzing',
309 dest='stateful_fuzzing',
311 help='Reuse contexts between runs (makes reproduction impossible)')
317 help="CC (default: $CC='{}')".format(CC))
323 help="CXX (default: $CXX='{}')".format(CXX))
329 help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
335 help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
341 help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
347 help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
353 help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
358 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
360 args = parser.parse_args(args)
361 args = parse_env_flags(args, ' '.join(
362 [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
364 # Check option sanity
365 if args.msan and (args.asan or args.ubsan):
366 raise RuntimeError('MSAN may not be used with any other sanitizers')
367 if args.msan_track_origins and not args.msan:
368 raise RuntimeError('--enable-msan-track-origins requires MSAN')
369 if args.ubsan_pointer_overflow and not args.ubsan:
370 raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN')
371 if args.sanitize_recover and not args.sanitize:
372 raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
379 args = build_parser(args)
380 except Exception as e:
383 # The compilation flags we are setting
384 targets = args.TARGET
387 cppflags = shlex.split(args.cppflags)
388 cflags = shlex.split(args.cflags)
389 ldflags = shlex.split(args.ldflags)
390 cxxflags = shlex.split(args.cxxflags)
391 mflags = shlex.split(args.mflags)
392 # Flags to be added to both cflags and cxxflags
396 '-DDEBUGLEVEL={}'.format(args.debug),
397 '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
398 '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
401 # Set flags for options
402 assert not (args.fuzzer and args.coverage)
405 '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
408 common_flags += ['-fsanitize=fuzzer']
409 args.lib_fuzzing_engine = ''
411 mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
413 if args.sanitize_recover:
414 recover_flags = ['-fsanitize-recover=all']
416 recover_flags = ['-fno-sanitize-recover=all']
418 common_flags += recover_flags
421 msan_flags = ['-fsanitize=memory']
422 if args.msan_track_origins:
423 msan_flags += ['-fsanitize-memory-track-origins']
424 common_flags += msan_flags
425 # Append extra MSAN flags (it might require special setup)
426 cppflags += [args.msan_extra_cppflags]
427 cflags += [args.msan_extra_cflags]
428 cxxflags += [args.msan_extra_cxxflags]
429 ldflags += [args.msan_extra_ldflags]
432 common_flags += ['-fsanitize=address']
435 ubsan_flags = ['-fsanitize=undefined']
436 if not args.ubsan_pointer_overflow:
437 ubsan_flags += overflow_ubsan_flags(cc, cxx)
438 common_flags += ubsan_flags
440 if args.stateful_fuzzing:
441 cppflags += ['-DSTATEFUL_FUZZING']
443 if args.fuzzing_mode:
444 cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
446 if args.lib_fuzzing_engine == 'libregression.a':
447 targets = ['libregression.a'] + targets
449 # Append the common flags
450 cflags += common_flags
451 cxxflags += common_flags
453 # Prepare the flags for Make
454 cc_str = "CC={}".format(cc)
455 cxx_str = "CXX={}".format(cxx)
456 cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
457 cflags_str = "CFLAGS={}".format(' '.join(cflags))
458 cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
459 ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
462 print('MFLAGS={}'.format(' '.join(mflags)))
471 clean_cmd = ['make', 'clean'] + mflags
472 print(' '.join(clean_cmd))
473 subprocess.check_call(clean_cmd)
483 print(' '.join(build_cmd))
484 subprocess.check_call(build_cmd)
488 def libfuzzer_parser(args):
490 Runs a libfuzzer binary.
491 Passes all extra arguments to libfuzzer.
492 The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
494 Generates output in the CORPORA directory, puts crashes in the ARTIFACT
495 directory, and takes extra input from the SEED directory.
496 To merge AFL's output pass the SEED as AFL's output directory and pass
499 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
503 help='Override the default corpora dir (default: {})'.format(
504 abs_join(CORPORA_DIR, 'TARGET')))
508 help='Override the default artifact dir (default: {})'.format(
509 abs_join(CORPORA_DIR, 'TARGET-crash')))
513 help='Override the default seed dir (default: {})'.format(
514 abs_join(CORPORA_DIR, 'TARGET-seed')))
518 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
519 args, extra = parser.parse_known_args(args)
522 if args.TARGET and args.TARGET not in TARGETS:
523 raise RuntimeError('{} is not a valid target'.format(args.TARGET))
528 def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
530 corpora = abs_join(CORPORA_DIR, target)
532 artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
534 seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
535 if extra_args is None:
538 target = abs_join(FUZZ_DIR, target)
540 corpora = [create(corpora)]
541 artifact = create(artifact)
544 corpora += [artifact]
548 cmd = [target, '-artifact_prefix={}/'.format(artifact)]
549 cmd += corpora + extra_args
551 subprocess.check_call(cmd)
554 def libfuzzer_cmd(args):
556 args = libfuzzer_parser(args)
557 except Exception as e:
560 libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
564 def afl_parser(args):
566 Runs an afl-fuzz job.
567 Passes all extra arguments to afl-fuzz.
568 The fuzzer should have been built with CC/CXX set to the AFL compilers,
569 and with LIB_FUZZING_ENGINE='libregression.a'.
570 Takes input from CORPORA and writes output to OUTPUT.
571 Uses AFL_FUZZ as the binary (set from flag or environment variable).
573 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
577 help='Override the default corpora dir (default: {})'.format(
578 abs_join(CORPORA_DIR, 'TARGET')))
582 help='Override the default AFL output dir (default: {})'.format(
583 abs_join(CORPORA_DIR, 'TARGET-afl')))
588 help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
592 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
593 args, extra = parser.parse_known_args(args)
596 if args.TARGET and args.TARGET not in TARGETS:
597 raise RuntimeError('{} is not a valid target'.format(args.TARGET))
600 args.corpora = abs_join(CORPORA_DIR, args.TARGET)
602 args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
609 args = afl_parser(args)
610 except Exception as e:
613 target = abs_join(FUZZ_DIR, args.TARGET)
615 corpora = create(args.corpora)
616 output = create(args.output)
618 cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
619 cmd += [target, '@@']
625 def regression(args):
628 Runs one or more regression tests.
629 The fuzzer should have been built with with
630 LIB_FUZZING_ENGINE='libregression.a'.
631 Takes input from CORPORA.
633 args = targets_parser(args, description)
634 except Exception as e:
637 for target in args.TARGET:
638 corpora = create(abs_join(CORPORA_DIR, target))
639 target = abs_join(FUZZ_DIR, target)
640 cmd = [target, corpora]
642 subprocess.check_call(cmd)
646 def gen_parser(args):
648 Generate a seed corpus appropriate for TARGET with data generated with
650 The fuzz inputs are prepended with a seed before the zstd data, so the
651 output of decodecorpus shouldn't be used directly.
652 Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
653 puts the output in SEED.
654 DECODECORPUS is the decodecorpus binary, and must already be built.
656 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
662 help='Number of samples to generate')
667 help='Maximum sample size to generate')
671 help='Override the default seed dir (default: {})'.format(
672 abs_join(CORPORA_DIR, 'TARGET-seed')))
676 default=DECODECORPUS,
677 help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
683 help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
685 '--fuzz-rng-seed-size',
688 help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
693 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
694 args, extra = parser.parse_known_args(args)
697 if args.TARGET and args.TARGET not in TARGETS:
698 raise RuntimeError('{} is not a valid target'.format(args.TARGET))
701 args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
703 if not os.path.isfile(args.decodecorpus):
704 raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
705 format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
712 args = gen_parser(args)
713 except Exception as e:
717 seed = create(args.seed)
718 with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
719 info = TARGET_INFO[args.TARGET]
721 if info.input_type == InputType.DICTIONARY_DATA:
722 number = max(args.number, 1000)
727 '-n{}'.format(args.number),
728 '-p{}/'.format(compressed),
729 '-o{}'.format(decompressed),
732 if info.frame_type == FrameType.BLOCK:
735 '--max-block-size-log={}'.format(min(args.max_size_log, 17))
738 cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
741 subprocess.check_call(cmd)
743 if info.input_type == InputType.RAW_DATA:
744 print('using decompressed data in {}'.format(decompressed))
745 samples = decompressed
746 elif info.input_type == InputType.COMPRESSED_DATA:
747 print('using compressed data in {}'.format(compressed))
750 assert info.input_type == InputType.DICTIONARY_DATA
751 print('making dictionary data from {}'.format(decompressed))
753 min_dict_size_log = 9
754 max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
755 for dict_size_log in range(min_dict_size_log, max_dict_size_log):
756 dict_size = 1 << dict_size_log
761 '--maxdict={}'.format(dict_size),
762 '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
765 subprocess.check_call(cmd)
767 # Copy the samples over and prepend the RNG seeds
768 for name in os.listdir(samples):
769 samplename = abs_join(samples, name)
770 outname = abs_join(seed, name)
771 with open(samplename, 'rb') as sample:
772 with open(outname, 'wb') as out:
774 chunk = sample.read(CHUNK_SIZE)
775 while len(chunk) > 0:
777 chunk = sample.read(CHUNK_SIZE)
784 Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
785 TARGET_seed_corpus. All extra args are passed to libfuzzer.
787 args = targets_parser(args, description)
788 except Exception as e:
792 for target in args.TARGET:
793 # Merge the corpus + anything else into the seed_corpus
794 corpus = abs_join(CORPORA_DIR, target)
795 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
796 extra_args = [corpus, "-merge=1"] + args.extra
797 libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
798 seeds = set(os.listdir(seed_corpus))
799 # Copy all crashes directly into the seed_corpus if not already present
800 crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
801 for crash in os.listdir(crashes):
802 if crash not in seeds:
803 shutil.copy(abs_join(crashes, crash), seed_corpus)
810 Zips up the seed corpus.
812 args = targets_parser(args, description)
813 except Exception as e:
817 for target in args.TARGET:
818 # Zip the seed_corpus
819 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
820 zip_file = "{}.zip".format(seed_corpus)
821 cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
823 subprocess.check_call(cmd, cwd=seed_corpus)
827 print("\n".join(TARGETS))
830 def short_help(args):
832 print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
837 print("\tfuzzing helpers (select a command and pass -h for help)\n")
839 print("\t-h, --help\tPrint this message")
842 print("\tbuild\t\tBuild a fuzzer")
843 print("\tlibfuzzer\tRun a libFuzzer fuzzer")
844 print("\tafl\t\tRun an AFL fuzzer")
845 print("\tregression\tRun a regression test")
846 print("\tgen\t\tGenerate a seed corpus for a fuzzer")
847 print("\tminimize\tMinimize the test corpora")
848 print("\tzip\t\tZip the minimized corpora up")
849 print("\tlist\t\tList the available targets")
857 if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
860 command = args.pop(1)
861 args[0] = "{} {}".format(args[0], command)
862 if command == "build":
864 if command == "libfuzzer":
865 return libfuzzer_cmd(args)
866 if command == "regression":
867 return regression(args)
872 if command == "minimize":
873 return minimize(args)
876 if command == "list":
877 return list_cmd(args)
879 print("Error: No such command {} (pass -h for help)".format(command))
883 if __name__ == "__main__":