1 //===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Recursive parser implementation for the matcher expression grammar.
13 //===----------------------------------------------------------------------===//
18 #include "clang/ASTMatchers/Dynamic/Parser.h"
19 #include "clang/ASTMatchers/Dynamic/Registry.h"
20 #include "clang/Basic/CharInfo.h"
21 #include "llvm/ADT/Twine.h"
24 namespace ast_matchers {
27 /// \brief Simple structure to hold information for one token from the parser.
28 struct Parser::TokenInfo {
29 /// \brief Different possible tokens.
42 /// \brief Some known identifiers.
43 static const char* const ID_Bind;
45 TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}
53 const char* const Parser::TokenInfo::ID_Bind = "bind";
55 /// \brief Simple tokenizer for the parser.
56 class Parser::CodeTokenizer {
58 explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
59 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error) {
60 NextToken = getNextToken();
63 /// \brief Returns but doesn't consume the next token.
64 const TokenInfo &peekNextToken() const { return NextToken; }
66 /// \brief Consumes and returns the next token.
67 TokenInfo consumeNextToken() {
68 TokenInfo ThisToken = NextToken;
69 NextToken = getNextToken();
73 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
76 TokenInfo getNextToken() {
79 Result.Range.Start = currentLocation();
82 Result.Kind = TokenInfo::TK_Eof;
89 Result.Kind = TokenInfo::TK_Comma;
90 Result.Text = Code.substr(0, 1);
91 Code = Code.drop_front();
94 Result.Kind = TokenInfo::TK_Period;
95 Result.Text = Code.substr(0, 1);
96 Code = Code.drop_front();
99 Result.Kind = TokenInfo::TK_OpenParen;
100 Result.Text = Code.substr(0, 1);
101 Code = Code.drop_front();
104 Result.Kind = TokenInfo::TK_CloseParen;
105 Result.Text = Code.substr(0, 1);
106 Code = Code.drop_front();
111 // Parse a string literal.
112 consumeStringLiteral(&Result);
115 case '0': case '1': case '2': case '3': case '4':
116 case '5': case '6': case '7': case '8': case '9':
117 // Parse an unsigned literal.
118 consumeUnsignedLiteral(&Result);
122 if (isAlphanumeric(Code[0])) {
123 // Parse an identifier
124 size_t TokenLength = 1;
125 while (TokenLength < Code.size() && isAlphanumeric(Code[TokenLength]))
127 Result.Kind = TokenInfo::TK_Ident;
128 Result.Text = Code.substr(0, TokenLength);
129 Code = Code.drop_front(TokenLength);
131 Result.Kind = TokenInfo::TK_InvalidChar;
132 Result.Text = Code.substr(0, 1);
133 Code = Code.drop_front(1);
138 Result.Range.End = currentLocation();
142 /// \brief Consume an unsigned literal.
143 void consumeUnsignedLiteral(TokenInfo *Result) {
145 if (Code.size() > 1) {
146 // Consume the 'x' or 'b' radix modifier, if present.
147 switch (toLowercase(Code[1])) {
148 case 'x': case 'b': Length = 2;
151 while (Length < Code.size() && isHexDigit(Code[Length]))
154 Result->Text = Code.substr(0, Length);
155 Code = Code.drop_front(Length);
158 if (!Result->Text.getAsInteger(0, Value)) {
159 Result->Kind = TokenInfo::TK_Literal;
160 Result->Value = Value;
163 Range.Start = Result->Range.Start;
164 Range.End = currentLocation();
165 Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text;
166 Result->Kind = TokenInfo::TK_Error;
170 /// \brief Consume a string literal.
172 /// \c Code must be positioned at the start of the literal (the opening
173 /// quote). Consumed until it finds the same closing quote character.
174 void consumeStringLiteral(TokenInfo *Result) {
175 bool InEscape = false;
176 const char Marker = Code[0];
177 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
182 if (Code[Length] == '\\') {
186 if (Code[Length] == Marker) {
187 Result->Kind = TokenInfo::TK_Literal;
188 Result->Text = Code.substr(0, Length + 1);
189 Result->Value = Code.substr(1, Length - 1).str();
190 Code = Code.drop_front(Length + 1);
195 StringRef ErrorText = Code;
196 Code = Code.drop_front(Code.size());
198 Range.Start = Result->Range.Start;
199 Range.End = currentLocation();
200 Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
201 Result->Kind = TokenInfo::TK_Error;
204 /// \brief Consume all leading whitespace from \c Code.
205 void consumeWhitespace() {
206 while (!Code.empty() && isWhitespace(Code[0])) {
207 if (Code[0] == '\n') {
209 StartOfLine = Code.drop_front();
211 Code = Code.drop_front();
215 SourceLocation currentLocation() {
216 SourceLocation Location;
217 Location.Line = Line;
218 Location.Column = Code.data() - StartOfLine.data() + 1;
223 StringRef StartOfLine;
229 Parser::Sema::~Sema() {}
231 /// \brief Parse and validate a matcher expression.
232 /// \return \c true on success, in which case \c Value has the matcher parsed.
233 /// If the input is malformed, or some argument has an error, it
234 /// returns \c false.
235 bool Parser::parseMatcherExpressionImpl(VariantValue *Value) {
236 const TokenInfo NameToken = Tokenizer->consumeNextToken();
237 assert(NameToken.Kind == TokenInfo::TK_Ident);
238 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
239 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
240 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
245 std::vector<ParserValue> Args;
247 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
248 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
250 EndToken = Tokenizer->consumeNextToken();
253 if (Args.size() > 0) {
254 // We must find a , token to continue.
255 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
256 if (CommaToken.Kind != TokenInfo::TK_Comma) {
257 Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
263 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
264 NameToken.Text, NameToken.Range, Args.size() + 1);
265 ParserValue ArgValue;
266 ArgValue.Text = Tokenizer->peekNextToken().Text;
267 ArgValue.Range = Tokenizer->peekNextToken().Range;
268 if (!parseExpressionImpl(&ArgValue.Value)) return false;
270 Args.push_back(ArgValue);
273 if (EndToken.Kind == TokenInfo::TK_Eof) {
274 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
279 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
280 // Parse .bind("foo")
281 Tokenizer->consumeNextToken(); // consume the period.
282 const TokenInfo BindToken = Tokenizer->consumeNextToken();
283 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
284 const TokenInfo IDToken = Tokenizer->consumeNextToken();
285 const TokenInfo CloseToken = Tokenizer->consumeNextToken();
287 // TODO: We could use different error codes for each/some to be more
288 // explicit about the syntax error.
289 if (BindToken.Kind != TokenInfo::TK_Ident ||
290 BindToken.Text != TokenInfo::ID_Bind) {
291 Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
294 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
295 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
298 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
299 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
302 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
303 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
306 BindID = IDToken.Value.getString();
309 // Merge the start and end infos.
310 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
311 NameToken.Text, NameToken.Range);
312 SourceRange MatcherRange = NameToken.Range;
313 MatcherRange.End = EndToken.Range.End;
314 VariantMatcher Result = S->actOnMatcherExpression(
315 NameToken.Text, MatcherRange, BindID, Args, Error);
316 if (Result.isNull()) return false;
322 /// \brief Parse an <Expresssion>
323 bool Parser::parseExpressionImpl(VariantValue *Value) {
324 switch (Tokenizer->nextTokenKind()) {
325 case TokenInfo::TK_Literal:
326 *Value = Tokenizer->consumeNextToken().Value;
329 case TokenInfo::TK_Ident:
330 return parseMatcherExpressionImpl(Value);
332 case TokenInfo::TK_Eof:
333 Error->addError(Tokenizer->consumeNextToken().Range,
334 Error->ET_ParserNoCode);
337 case TokenInfo::TK_Error:
338 // This error was already reported by the tokenizer.
341 case TokenInfo::TK_OpenParen:
342 case TokenInfo::TK_CloseParen:
343 case TokenInfo::TK_Comma:
344 case TokenInfo::TK_Period:
345 case TokenInfo::TK_InvalidChar:
346 const TokenInfo Token = Tokenizer->consumeNextToken();
347 Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
351 llvm_unreachable("Unknown token kind.");
354 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
356 : Tokenizer(Tokenizer), S(S), Error(Error) {}
358 class RegistrySema : public Parser::Sema {
360 virtual ~RegistrySema() {}
361 VariantMatcher actOnMatcherExpression(StringRef MatcherName,
362 const SourceRange &NameRange,
364 ArrayRef<ParserValue> Args,
365 Diagnostics *Error) {
366 if (BindID.empty()) {
367 return Registry::constructMatcher(MatcherName, NameRange, Args, Error);
369 return Registry::constructBoundMatcher(MatcherName, NameRange, BindID,
375 bool Parser::parseExpression(StringRef Code, VariantValue *Value,
376 Diagnostics *Error) {
378 return parseExpression(Code, &S, Value, Error);
381 bool Parser::parseExpression(StringRef Code, Sema *S,
382 VariantValue *Value, Diagnostics *Error) {
383 CodeTokenizer Tokenizer(Code, Error);
384 if (!Parser(&Tokenizer, S, Error).parseExpressionImpl(Value)) return false;
385 if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
386 Error->addError(Tokenizer.peekNextToken().Range,
387 Error->ET_ParserTrailingCode);
393 llvm::Optional<DynTypedMatcher>
394 Parser::parseMatcherExpression(StringRef Code, Diagnostics *Error) {
396 return parseMatcherExpression(Code, &S, Error);
399 llvm::Optional<DynTypedMatcher>
400 Parser::parseMatcherExpression(StringRef Code, Parser::Sema *S,
401 Diagnostics *Error) {
403 if (!parseExpression(Code, S, &Value, Error))
404 return llvm::Optional<DynTypedMatcher>();
405 if (!Value.isMatcher()) {
406 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
407 return llvm::Optional<DynTypedMatcher>();
409 llvm::Optional<DynTypedMatcher> Result =
410 Value.getMatcher().getSingleMatcher();
411 if (!Result.hasValue()) {
412 Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
413 << Value.getTypeAsString();
418 } // namespace dynamic
419 } // namespace ast_matchers