1 //===- Parser.cpp - Matcher expression parser -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// Recursive parser implementation for the matcher expression grammar.
13 //===----------------------------------------------------------------------===//
15 #include "clang/ASTMatchers/Dynamic/Parser.h"
16 #include "clang/ASTMatchers/ASTMatchersInternal.h"
17 #include "clang/ASTMatchers/Dynamic/Diagnostics.h"
18 #include "clang/ASTMatchers/Dynamic/Registry.h"
19 #include "clang/Basic/CharInfo.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include "llvm/Support/ManagedStatic.h"
34 namespace ast_matchers {
37 /// Simple structure to hold information for one token from the parser.
38 struct Parser::TokenInfo {
39 /// Different possible tokens.
53 /// Some known identifiers.
54 static const char* const ID_Bind;
56 TokenInfo() = default;
59 TokenKind Kind = TK_Eof;
64 const char* const Parser::TokenInfo::ID_Bind = "bind";
66 /// Simple tokenizer for the parser.
67 class Parser::CodeTokenizer {
69 explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
70 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
71 NextToken = getNextToken();
74 CodeTokenizer(StringRef MatcherCode, Diagnostics *Error,
75 unsigned CodeCompletionOffset)
76 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
77 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
78 NextToken = getNextToken();
81 /// Returns but doesn't consume the next token.
82 const TokenInfo &peekNextToken() const { return NextToken; }
84 /// Consumes and returns the next token.
85 TokenInfo consumeNextToken() {
86 TokenInfo ThisToken = NextToken;
87 NextToken = getNextToken();
91 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
94 TokenInfo getNextToken() {
97 Result.Range.Start = currentLocation();
99 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
100 Result.Kind = TokenInfo::TK_CodeCompletion;
101 Result.Text = StringRef(CodeCompletionLocation, 0);
102 CodeCompletionLocation = nullptr;
107 Result.Kind = TokenInfo::TK_Eof;
114 Result.Kind = TokenInfo::TK_Eof;
118 Result.Kind = TokenInfo::TK_Comma;
119 Result.Text = Code.substr(0, 1);
120 Code = Code.drop_front();
123 Result.Kind = TokenInfo::TK_Period;
124 Result.Text = Code.substr(0, 1);
125 Code = Code.drop_front();
128 Result.Kind = TokenInfo::TK_OpenParen;
129 Result.Text = Code.substr(0, 1);
130 Code = Code.drop_front();
133 Result.Kind = TokenInfo::TK_CloseParen;
134 Result.Text = Code.substr(0, 1);
135 Code = Code.drop_front();
140 // Parse a string literal.
141 consumeStringLiteral(&Result);
144 case '0': case '1': case '2': case '3': case '4':
145 case '5': case '6': case '7': case '8': case '9':
146 // Parse an unsigned and float literal.
147 consumeNumberLiteral(&Result);
151 if (isAlphanumeric(Code[0])) {
152 // Parse an identifier
153 size_t TokenLength = 1;
155 // A code completion location in/immediately after an identifier will
156 // cause the portion of the identifier before the code completion
157 // location to become a code completion token.
158 if (CodeCompletionLocation == Code.data() + TokenLength) {
159 CodeCompletionLocation = nullptr;
160 Result.Kind = TokenInfo::TK_CodeCompletion;
161 Result.Text = Code.substr(0, TokenLength);
162 Code = Code.drop_front(TokenLength);
165 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
169 if (TokenLength == 4 && Code.startswith("true")) {
170 Result.Kind = TokenInfo::TK_Literal;
172 } else if (TokenLength == 5 && Code.startswith("false")) {
173 Result.Kind = TokenInfo::TK_Literal;
174 Result.Value = false;
176 Result.Kind = TokenInfo::TK_Ident;
177 Result.Text = Code.substr(0, TokenLength);
179 Code = Code.drop_front(TokenLength);
181 Result.Kind = TokenInfo::TK_InvalidChar;
182 Result.Text = Code.substr(0, 1);
183 Code = Code.drop_front(1);
188 Result.Range.End = currentLocation();
192 /// Consume an unsigned and float literal.
193 void consumeNumberLiteral(TokenInfo *Result) {
194 bool isFloatingLiteral = false;
196 if (Code.size() > 1) {
197 // Consume the 'x' or 'b' radix modifier, if present.
198 switch (toLowercase(Code[1])) {
199 case 'x': case 'b': Length = 2;
202 while (Length < Code.size() && isHexDigit(Code[Length]))
205 // Try to recognize a floating point literal.
206 while (Length < Code.size()) {
207 char c = Code[Length];
208 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
209 isFloatingLiteral = true;
216 Result->Text = Code.substr(0, Length);
217 Code = Code.drop_front(Length);
219 if (isFloatingLiteral) {
222 std::string Text = Result->Text.str();
223 double doubleValue = strtod(Text.c_str(), &end);
224 if (*end == 0 && errno == 0) {
225 Result->Kind = TokenInfo::TK_Literal;
226 Result->Value = doubleValue;
231 if (!Result->Text.getAsInteger(0, Value)) {
232 Result->Kind = TokenInfo::TK_Literal;
233 Result->Value = Value;
239 Range.Start = Result->Range.Start;
240 Range.End = currentLocation();
241 Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
242 Result->Kind = TokenInfo::TK_Error;
245 /// Consume a string literal.
247 /// \c Code must be positioned at the start of the literal (the opening
248 /// quote). Consumed until it finds the same closing quote character.
249 void consumeStringLiteral(TokenInfo *Result) {
250 bool InEscape = false;
251 const char Marker = Code[0];
252 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
257 if (Code[Length] == '\\') {
261 if (Code[Length] == Marker) {
262 Result->Kind = TokenInfo::TK_Literal;
263 Result->Text = Code.substr(0, Length + 1);
264 Result->Value = Code.substr(1, Length - 1);
265 Code = Code.drop_front(Length + 1);
270 StringRef ErrorText = Code;
271 Code = Code.drop_front(Code.size());
273 Range.Start = Result->Range.Start;
274 Range.End = currentLocation();
275 Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
276 Result->Kind = TokenInfo::TK_Error;
279 /// Consume all leading whitespace from \c Code.
280 void consumeWhitespace() {
281 while (!Code.empty() && isWhitespace(Code[0])) {
282 if (Code[0] == '\n') {
284 StartOfLine = Code.drop_front();
286 Code = Code.drop_front();
290 SourceLocation currentLocation() {
291 SourceLocation Location;
292 Location.Line = Line;
293 Location.Column = Code.data() - StartOfLine.data() + 1;
298 StringRef StartOfLine;
302 const char *CodeCompletionLocation = nullptr;
305 Parser::Sema::~Sema() = default;
307 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
308 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
312 std::vector<MatcherCompletion>
313 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
317 struct Parser::ScopedContextEntry {
320 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
321 P->ContextStack.push_back(std::make_pair(C, 0u));
324 ~ScopedContextEntry() {
325 P->ContextStack.pop_back();
329 ++P->ContextStack.back().second;
333 /// Parse expressions that start with an identifier.
335 /// This function can parse named values and matchers.
336 /// In case of failure it will try to determine the user's intent to give
337 /// an appropriate error message.
338 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
339 const TokenInfo NameToken = Tokenizer->consumeNextToken();
341 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
342 // Parse as a named value.
343 if (const VariantValue NamedValue =
344 NamedValues ? NamedValues->lookup(NameToken.Text)
347 if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
353 if (!parseBindID(BindID))
356 assert(NamedValue.isMatcher());
357 llvm::Optional<DynTypedMatcher> Result =
358 NamedValue.getMatcher().getSingleMatcher();
359 if (Result.hasValue()) {
360 llvm::Optional<DynTypedMatcher> Bound = Result->tryBind(BindID);
361 if (Bound.hasValue()) {
362 *Value = VariantMatcher::SingleMatcher(*Bound);
368 // If the syntax is correct and the name is not a matcher either, report
369 // unknown named value.
370 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
371 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
372 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
373 !S->lookupMatcherCtor(NameToken.Text)) {
374 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
378 // Otherwise, fallback to the matcher parser.
381 // Parse as a matcher expression.
382 return parseMatcherExpressionImpl(NameToken, Value);
385 bool Parser::parseBindID(std::string &BindID) {
386 // Parse .bind("foo")
387 assert(Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period);
388 Tokenizer->consumeNextToken(); // consume the period.
389 const TokenInfo BindToken = Tokenizer->consumeNextToken();
390 if (BindToken.Kind == TokenInfo::TK_CodeCompletion) {
391 addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1));
395 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
396 const TokenInfo IDToken = Tokenizer->consumeNextToken();
397 const TokenInfo CloseToken = Tokenizer->consumeNextToken();
399 // TODO: We could use different error codes for each/some to be more
400 // explicit about the syntax error.
401 if (BindToken.Kind != TokenInfo::TK_Ident ||
402 BindToken.Text != TokenInfo::ID_Bind) {
403 Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
406 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
407 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
410 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
411 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
414 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
415 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
418 BindID = IDToken.Value.getString();
422 /// Parse and validate a matcher expression.
423 /// \return \c true on success, in which case \c Value has the matcher parsed.
424 /// If the input is malformed, or some argument has an error, it
425 /// returns \c false.
426 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
427 VariantValue *Value) {
428 assert(NameToken.Kind == TokenInfo::TK_Ident);
429 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
430 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
431 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
436 llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
439 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
441 // Do not return here. We need to continue to give completion suggestions.
444 std::vector<ParserValue> Args;
448 ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
450 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
451 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
453 EndToken = Tokenizer->consumeNextToken();
457 // We must find a , token to continue.
458 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
459 if (CommaToken.Kind != TokenInfo::TK_Comma) {
460 Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
466 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
467 NameToken.Text, NameToken.Range,
469 ParserValue ArgValue;
470 ArgValue.Text = Tokenizer->peekNextToken().Text;
471 ArgValue.Range = Tokenizer->peekNextToken().Range;
472 if (!parseExpressionImpl(&ArgValue.Value)) {
476 Args.push_back(ArgValue);
481 if (EndToken.Kind == TokenInfo::TK_Eof) {
482 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
487 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
488 if (!parseBindID(BindID))
495 // Merge the start and end infos.
496 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
497 NameToken.Text, NameToken.Range);
498 SourceRange MatcherRange = NameToken.Range;
499 MatcherRange.End = EndToken.Range.End;
500 VariantMatcher Result = S->actOnMatcherExpression(
501 *Ctor, MatcherRange, BindID, Args, Error);
502 if (Result.isNull()) return false;
508 // If the prefix of this completion matches the completion token, add it to
509 // Completions minus the prefix.
510 void Parser::addCompletion(const TokenInfo &CompToken,
511 const MatcherCompletion& Completion) {
512 if (StringRef(Completion.TypedText).startswith(CompToken.Text) &&
513 Completion.Specificity > 0) {
514 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
515 Completion.MatcherDecl, Completion.Specificity);
519 std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
520 ArrayRef<ArgKind> AcceptedTypes) {
521 if (!NamedValues) return std::vector<MatcherCompletion>();
522 std::vector<MatcherCompletion> Result;
523 for (const auto &Entry : *NamedValues) {
524 unsigned Specificity;
525 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
527 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
528 Result.emplace_back(Entry.getKey(), Decl, Specificity);
534 void Parser::addExpressionCompletions() {
535 const TokenInfo CompToken = Tokenizer->consumeNextToken();
536 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
538 // We cannot complete code if there is an invalid element on the context
540 for (ContextStackTy::iterator I = ContextStack.begin(),
541 E = ContextStack.end();
547 auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
548 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
549 addCompletion(CompToken, Completion);
552 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
553 addCompletion(CompToken, Completion);
557 /// Parse an <Expression>
558 bool Parser::parseExpressionImpl(VariantValue *Value) {
559 switch (Tokenizer->nextTokenKind()) {
560 case TokenInfo::TK_Literal:
561 *Value = Tokenizer->consumeNextToken().Value;
564 case TokenInfo::TK_Ident:
565 return parseIdentifierPrefixImpl(Value);
567 case TokenInfo::TK_CodeCompletion:
568 addExpressionCompletions();
571 case TokenInfo::TK_Eof:
572 Error->addError(Tokenizer->consumeNextToken().Range,
573 Error->ET_ParserNoCode);
576 case TokenInfo::TK_Error:
577 // This error was already reported by the tokenizer.
580 case TokenInfo::TK_OpenParen:
581 case TokenInfo::TK_CloseParen:
582 case TokenInfo::TK_Comma:
583 case TokenInfo::TK_Period:
584 case TokenInfo::TK_InvalidChar:
585 const TokenInfo Token = Tokenizer->consumeNextToken();
586 Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
590 llvm_unreachable("Unknown token kind.");
593 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
595 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
596 const NamedValueMap *NamedValues, Diagnostics *Error)
597 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
598 NamedValues(NamedValues), Error(Error) {}
600 Parser::RegistrySema::~RegistrySema() = default;
602 llvm::Optional<MatcherCtor>
603 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
604 return Registry::lookupMatcherCtor(MatcherName);
607 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
608 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
609 ArrayRef<ParserValue> Args, Diagnostics *Error) {
610 if (BindID.empty()) {
611 return Registry::constructMatcher(Ctor, NameRange, Args, Error);
613 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
618 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
619 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
620 return Registry::getAcceptedCompletionTypes(Context);
623 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
624 ArrayRef<ArgKind> AcceptedTypes) {
625 return Registry::getMatcherCompletions(AcceptedTypes);
628 bool Parser::parseExpression(StringRef Code, Sema *S,
629 const NamedValueMap *NamedValues,
630 VariantValue *Value, Diagnostics *Error) {
631 CodeTokenizer Tokenizer(Code, Error);
632 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
634 if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
635 Error->addError(Tokenizer.peekNextToken().Range,
636 Error->ET_ParserTrailingCode);
642 std::vector<MatcherCompletion>
643 Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S,
644 const NamedValueMap *NamedValues) {
646 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
647 Parser P(&Tokenizer, S, NamedValues, &Error);
649 P.parseExpressionImpl(&Dummy);
651 // Sort by specificity, then by name.
652 llvm::sort(P.Completions,
653 [](const MatcherCompletion &A, const MatcherCompletion &B) {
654 if (A.Specificity != B.Specificity)
655 return A.Specificity > B.Specificity;
656 return A.TypedText < B.TypedText;
659 return P.Completions;
662 llvm::Optional<DynTypedMatcher>
663 Parser::parseMatcherExpression(StringRef Code, Sema *S,
664 const NamedValueMap *NamedValues,
665 Diagnostics *Error) {
667 if (!parseExpression(Code, S, NamedValues, &Value, Error))
668 return llvm::Optional<DynTypedMatcher>();
669 if (!Value.isMatcher()) {
670 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
671 return llvm::Optional<DynTypedMatcher>();
673 llvm::Optional<DynTypedMatcher> Result =
674 Value.getMatcher().getSingleMatcher();
675 if (!Result.hasValue()) {
676 Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
677 << Value.getTypeAsString();
682 } // namespace dynamic
683 } // namespace ast_matchers