1 //===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //===----------------------------------------------------------------------===//
9 // This is a utility class used to parse user-provided text files with
10 // "special case lists" for code sanitizers. Such files are used to
11 // define an "ABI list" for DataFlowSanitizer and blacklists for sanitizers
12 // like AddressSanitizer or UndefinedBehaviorSanitizer.
14 // Empty lines and lines starting with "#" are ignored. Sections are defined
15 // using a '[section_name]' header and can be used to specify sanitizers the
16 // entries below it apply to. Section names are regular expressions, and
17 // entries without a section header match all sections (e.g. an '[*]' header
19 // The remaining lines should have the form:
20 // prefix:wildcard_expression[=category]
21 // If category is not specified, it is assumed to be empty string.
22 // Definitions of "prefix" and "category" are sanitizer-specific. For example,
23 // sanitizer blacklists support prefixes "src", "fun" and "global".
24 // Wildcard expressions define, respectively, source files, functions or
25 // globals which shouldn't be instrumented.
26 // Examples of categories:
27 // "functional": used in DFSan to list functions with pure functional
29 // "init": used in ASan blacklist to disable initialization-order bugs
30 // detection for certain globals or source files.
31 // Full special case list file example:
34 // # Blacklisted items:
35 // fun:*_ZN4base6subtle*
36 // global:*global_with_bad_access_or_initialization*
37 // global:*global_with_initialization_issues*=init
38 // type:*Namespace::ClassName*=init
39 // src:file_with_tricky_code.cc
40 // src:ignore-global-initializers-issues.cc=init
43 // # Functions with pure functional semantics:
47 // Note that the wild card is in fact an llvm::Regex, but * is automatically
50 //===----------------------------------------------------------------------===//
52 #ifndef LLVM_SUPPORT_SPECIALCASELIST_H
53 #define LLVM_SUPPORT_SPECIALCASELIST_H
55 #include "llvm/ADT/StringMap.h"
56 #include "llvm/ADT/StringSet.h"
57 #include "llvm/Support/Regex.h"
58 #include "llvm/Support/TrigramIndex.h"
67 class SpecialCaseList {
69 /// Parses the special case list entries from files. On failure, returns
70 /// 0 and writes an error message to string.
71 static std::unique_ptr<SpecialCaseList>
72 create(const std::vector<std::string> &Paths, std::string &Error);
73 /// Parses the special case list from a memory buffer. On failure, returns
74 /// 0 and writes an error message to string.
75 static std::unique_ptr<SpecialCaseList> create(const MemoryBuffer *MB,
77 /// Parses the special case list entries from files. On failure, reports a
79 static std::unique_ptr<SpecialCaseList>
80 createOrDie(const std::vector<std::string> &Paths);
84 /// Returns true, if special case list contains a line
86 /// @Prefix:<E>=@Category
88 /// where @Query satisfies wildcard expression <E> in a given @Section.
89 bool inSection(StringRef Section, StringRef Prefix, StringRef Query,
90 StringRef Category = StringRef()) const;
92 /// Returns the line number corresponding to the special case list entry if
93 /// the special case list contains a line
95 /// @Prefix:<E>=@Category
97 /// where @Query satisfies wildcard expression <E> in a given @Section.
98 /// Returns zero if there is no blacklist entry corresponding to this
100 unsigned inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query,
101 StringRef Category = StringRef()) const;
104 // Implementations of the create*() functions that can also be used by derived
106 bool createInternal(const std::vector<std::string> &Paths,
108 bool createInternal(const MemoryBuffer *MB, std::string &Error);
110 SpecialCaseList() = default;
111 SpecialCaseList(SpecialCaseList const &) = delete;
112 SpecialCaseList &operator=(SpecialCaseList const &) = delete;
114 /// Represents a set of regular expressions. Regular expressions which are
115 /// "literal" (i.e. no regex metacharacters) are stored in Strings. The
116 /// reason for doing so is efficiency; StringMap is much faster at matching
117 /// literal strings than Regex.
120 bool insert(std::string Regexp, unsigned LineNumber, std::string &REError);
121 // Returns the line number in the source file that this query matches to.
122 // Returns zero if no match is found.
123 unsigned match(StringRef Query) const;
126 StringMap<unsigned> Strings;
127 TrigramIndex Trigrams;
128 std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
131 using SectionEntries = StringMap<StringMap<Matcher>>;
134 Section(std::unique_ptr<Matcher> M) : SectionMatcher(std::move(M)){};
136 std::unique_ptr<Matcher> SectionMatcher;
137 SectionEntries Entries;
140 std::vector<Section> Sections;
142 /// Parses just-constructed SpecialCaseList entries from a memory buffer.
143 bool parse(const MemoryBuffer *MB, StringMap<size_t> &SectionsMap,
146 // Helper method for derived classes to search by Prefix, Query, and Category
147 // once they have already resolved a section entry.
148 unsigned inSectionBlame(const SectionEntries &Entries, StringRef Prefix,
149 StringRef Query, StringRef Category) const;
154 #endif // LLVM_SUPPORT_SPECIALCASELIST_H