1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a glob pattern matcher.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Support/GlobPattern.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/Optional.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/Support/Errc.h"
22 static bool hasWildcard(StringRef S) {
23 return S.find_first_of("?*[") != StringRef::npos;
26 // Expands character ranges and returns a bitmap.
27 // For example, "a-cf-hz" is expanded to "abcfghz".
28 static Expected<BitVector> expand(StringRef S, StringRef Original) {
29 BitVector BV(256, false);
39 // If it doesn't start with something like X-Y,
40 // consume the first character and proceed.
47 // It must be in the form of X-Y.
48 // Validate it and then interpret the range.
50 return make_error<StringError>("invalid glob pattern: " + Original,
51 errc::invalid_argument);
53 for (int C = Start; C <= End; ++C)
54 BV[(uint8_t)C] = true;
59 BV[(uint8_t)C] = true;
63 // This is a scanner for the glob pattern.
64 // A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
65 // (which is a negative form of "[<chars>]"), or a non-meta character.
66 // This function returns the first token in S.
67 static Expected<BitVector> scan(StringRef &S, StringRef Original) {
71 // '*' is represented by an empty bitvector.
72 // All other bitvectors are 256-bit long.
76 return BitVector(256, true);
78 size_t End = S.find(']', 1);
79 if (End == StringRef::npos)
80 return make_error<StringError>("invalid glob pattern: " + Original,
81 errc::invalid_argument);
83 StringRef Chars = S.substr(1, End - 1);
84 S = S.substr(End + 1);
85 if (Chars.startswith("^")) {
86 Expected<BitVector> BV = expand(Chars.substr(1), Original);
88 return BV.takeError();
91 return expand(Chars, Original);
94 BitVector BV(256, false);
95 BV[(uint8_t)S[0]] = true;
101 Expected<GlobPattern> GlobPattern::create(StringRef S) {
104 // S doesn't contain any metacharacter,
105 // so the regular string comparison should work.
106 if (!hasWildcard(S)) {
111 // S is something like "foo*". We can use startswith().
112 if (S.endswith("*") && !hasWildcard(S.drop_back())) {
113 Pat.Prefix = S.drop_back();
117 // S is something like "*foo". We can use endswith().
118 if (S.startswith("*") && !hasWildcard(S.drop_front())) {
119 Pat.Suffix = S.drop_front();
123 // Otherwise, we need to do real glob pattern matching.
124 // Parse the pattern now.
125 StringRef Original = S;
127 Expected<BitVector> BV = scan(S, Original);
129 return BV.takeError();
130 Pat.Tokens.push_back(*BV);
135 bool GlobPattern::match(StringRef S) const {
139 return S.startswith(*Prefix);
141 return S.endswith(*Suffix);
142 return matchOne(Tokens, S);
145 // Runs glob pattern Pats against string S.
146 bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
151 // If Pats[0] is '*', try to match Pats[1..] against all possible
152 // tail strings of S to see at least one pattern succeeds.
153 if (Pats[0].size() == 0) {
154 Pats = Pats.slice(1);
156 // Fast path. If a pattern is '*', it matches anything.
158 for (size_t I = 0, E = S.size(); I < E; ++I)
159 if (matchOne(Pats, S.substr(I)))
164 // If Pats[0] is not '*', it must consume one character.
165 if (S.empty() || !Pats[0][(uint8_t)S[0]])
167 Pats = Pats.slice(1);