1 //===- Parser.cpp - Matcher expression parser -----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Recursive parser implementation for the matcher expression grammar.
12 //===----------------------------------------------------------------------===//
14 #include "clang/ASTMatchers/Dynamic/Parser.h"
15 #include "clang/ASTMatchers/ASTMatchersInternal.h"
16 #include "clang/ASTMatchers/Dynamic/Diagnostics.h"
17 #include "clang/ASTMatchers/Dynamic/Registry.h"
18 #include "clang/Basic/CharInfo.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/ManagedStatic.h"
33 namespace ast_matchers {
36 /// Simple structure to hold information for one token from the parser.
37 struct Parser::TokenInfo {
38 /// Different possible tokens.
52 /// Some known identifiers.
53 static const char* const ID_Bind;
55 TokenInfo() = default;
58 TokenKind Kind = TK_Eof;
63 const char* const Parser::TokenInfo::ID_Bind = "bind";
65 /// Simple tokenizer for the parser.
66 class Parser::CodeTokenizer {
68 explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
69 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
70 NextToken = getNextToken();
73 CodeTokenizer(StringRef MatcherCode, Diagnostics *Error,
74 unsigned CodeCompletionOffset)
75 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
76 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
77 NextToken = getNextToken();
80 /// Returns but doesn't consume the next token.
81 const TokenInfo &peekNextToken() const { return NextToken; }
83 /// Consumes and returns the next token.
84 TokenInfo consumeNextToken() {
85 TokenInfo ThisToken = NextToken;
86 NextToken = getNextToken();
90 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
93 TokenInfo getNextToken() {
96 Result.Range.Start = currentLocation();
98 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
99 Result.Kind = TokenInfo::TK_CodeCompletion;
100 Result.Text = StringRef(CodeCompletionLocation, 0);
101 CodeCompletionLocation = nullptr;
106 Result.Kind = TokenInfo::TK_Eof;
113 Result.Kind = TokenInfo::TK_Eof;
117 Result.Kind = TokenInfo::TK_Comma;
118 Result.Text = Code.substr(0, 1);
119 Code = Code.drop_front();
122 Result.Kind = TokenInfo::TK_Period;
123 Result.Text = Code.substr(0, 1);
124 Code = Code.drop_front();
127 Result.Kind = TokenInfo::TK_OpenParen;
128 Result.Text = Code.substr(0, 1);
129 Code = Code.drop_front();
132 Result.Kind = TokenInfo::TK_CloseParen;
133 Result.Text = Code.substr(0, 1);
134 Code = Code.drop_front();
139 // Parse a string literal.
140 consumeStringLiteral(&Result);
143 case '0': case '1': case '2': case '3': case '4':
144 case '5': case '6': case '7': case '8': case '9':
145 // Parse an unsigned and float literal.
146 consumeNumberLiteral(&Result);
150 if (isAlphanumeric(Code[0])) {
151 // Parse an identifier
152 size_t TokenLength = 1;
154 // A code completion location in/immediately after an identifier will
155 // cause the portion of the identifier before the code completion
156 // location to become a code completion token.
157 if (CodeCompletionLocation == Code.data() + TokenLength) {
158 CodeCompletionLocation = nullptr;
159 Result.Kind = TokenInfo::TK_CodeCompletion;
160 Result.Text = Code.substr(0, TokenLength);
161 Code = Code.drop_front(TokenLength);
164 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
168 if (TokenLength == 4 && Code.startswith("true")) {
169 Result.Kind = TokenInfo::TK_Literal;
171 } else if (TokenLength == 5 && Code.startswith("false")) {
172 Result.Kind = TokenInfo::TK_Literal;
173 Result.Value = false;
175 Result.Kind = TokenInfo::TK_Ident;
176 Result.Text = Code.substr(0, TokenLength);
178 Code = Code.drop_front(TokenLength);
180 Result.Kind = TokenInfo::TK_InvalidChar;
181 Result.Text = Code.substr(0, 1);
182 Code = Code.drop_front(1);
187 Result.Range.End = currentLocation();
191 /// Consume an unsigned and float literal.
192 void consumeNumberLiteral(TokenInfo *Result) {
193 bool isFloatingLiteral = false;
195 if (Code.size() > 1) {
196 // Consume the 'x' or 'b' radix modifier, if present.
197 switch (toLowercase(Code[1])) {
198 case 'x': case 'b': Length = 2;
201 while (Length < Code.size() && isHexDigit(Code[Length]))
204 // Try to recognize a floating point literal.
205 while (Length < Code.size()) {
206 char c = Code[Length];
207 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
208 isFloatingLiteral = true;
215 Result->Text = Code.substr(0, Length);
216 Code = Code.drop_front(Length);
218 if (isFloatingLiteral) {
221 std::string Text = Result->Text.str();
222 double doubleValue = strtod(Text.c_str(), &end);
223 if (*end == 0 && errno == 0) {
224 Result->Kind = TokenInfo::TK_Literal;
225 Result->Value = doubleValue;
230 if (!Result->Text.getAsInteger(0, Value)) {
231 Result->Kind = TokenInfo::TK_Literal;
232 Result->Value = Value;
238 Range.Start = Result->Range.Start;
239 Range.End = currentLocation();
240 Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
241 Result->Kind = TokenInfo::TK_Error;
244 /// Consume a string literal.
246 /// \c Code must be positioned at the start of the literal (the opening
247 /// quote). Consumed until it finds the same closing quote character.
248 void consumeStringLiteral(TokenInfo *Result) {
249 bool InEscape = false;
250 const char Marker = Code[0];
251 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
256 if (Code[Length] == '\\') {
260 if (Code[Length] == Marker) {
261 Result->Kind = TokenInfo::TK_Literal;
262 Result->Text = Code.substr(0, Length + 1);
263 Result->Value = Code.substr(1, Length - 1);
264 Code = Code.drop_front(Length + 1);
269 StringRef ErrorText = Code;
270 Code = Code.drop_front(Code.size());
272 Range.Start = Result->Range.Start;
273 Range.End = currentLocation();
274 Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
275 Result->Kind = TokenInfo::TK_Error;
278 /// Consume all leading whitespace from \c Code.
279 void consumeWhitespace() {
280 while (!Code.empty() && isWhitespace(Code[0])) {
281 if (Code[0] == '\n') {
283 StartOfLine = Code.drop_front();
285 Code = Code.drop_front();
289 SourceLocation currentLocation() {
290 SourceLocation Location;
291 Location.Line = Line;
292 Location.Column = Code.data() - StartOfLine.data() + 1;
297 StringRef StartOfLine;
301 const char *CodeCompletionLocation = nullptr;
304 Parser::Sema::~Sema() = default;
306 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
307 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
311 std::vector<MatcherCompletion>
312 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
316 struct Parser::ScopedContextEntry {
319 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
320 P->ContextStack.push_back(std::make_pair(C, 0u));
323 ~ScopedContextEntry() {
324 P->ContextStack.pop_back();
328 ++P->ContextStack.back().second;
332 /// Parse expressions that start with an identifier.
334 /// This function can parse named values and matchers.
335 /// In case of failure it will try to determine the user's intent to give
336 /// an appropriate error message.
337 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
338 const TokenInfo NameToken = Tokenizer->consumeNextToken();
340 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
341 // Parse as a named value.
342 if (const VariantValue NamedValue =
343 NamedValues ? NamedValues->lookup(NameToken.Text)
346 if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
352 if (!parseBindID(BindID))
355 assert(NamedValue.isMatcher());
356 llvm::Optional<DynTypedMatcher> Result =
357 NamedValue.getMatcher().getSingleMatcher();
358 if (Result.hasValue()) {
359 llvm::Optional<DynTypedMatcher> Bound = Result->tryBind(BindID);
360 if (Bound.hasValue()) {
361 *Value = VariantMatcher::SingleMatcher(*Bound);
367 // If the syntax is correct and the name is not a matcher either, report
368 // unknown named value.
369 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
370 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
371 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
372 !S->lookupMatcherCtor(NameToken.Text)) {
373 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
377 // Otherwise, fallback to the matcher parser.
380 // Parse as a matcher expression.
381 return parseMatcherExpressionImpl(NameToken, Value);
384 bool Parser::parseBindID(std::string &BindID) {
385 // Parse .bind("foo")
386 assert(Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period);
387 Tokenizer->consumeNextToken(); // consume the period.
388 const TokenInfo BindToken = Tokenizer->consumeNextToken();
389 if (BindToken.Kind == TokenInfo::TK_CodeCompletion) {
390 addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1));
394 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
395 const TokenInfo IDToken = Tokenizer->consumeNextToken();
396 const TokenInfo CloseToken = Tokenizer->consumeNextToken();
398 // TODO: We could use different error codes for each/some to be more
399 // explicit about the syntax error.
400 if (BindToken.Kind != TokenInfo::TK_Ident ||
401 BindToken.Text != TokenInfo::ID_Bind) {
402 Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
405 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
406 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
409 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
410 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
413 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
414 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
417 BindID = IDToken.Value.getString();
421 /// Parse and validate a matcher expression.
422 /// \return \c true on success, in which case \c Value has the matcher parsed.
423 /// If the input is malformed, or some argument has an error, it
424 /// returns \c false.
425 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
426 VariantValue *Value) {
427 assert(NameToken.Kind == TokenInfo::TK_Ident);
428 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
429 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
430 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
435 llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
438 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
440 // Do not return here. We need to continue to give completion suggestions.
443 std::vector<ParserValue> Args;
447 ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
449 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
450 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
452 EndToken = Tokenizer->consumeNextToken();
456 // We must find a , token to continue.
457 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
458 if (CommaToken.Kind != TokenInfo::TK_Comma) {
459 Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
465 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
466 NameToken.Text, NameToken.Range,
468 ParserValue ArgValue;
469 ArgValue.Text = Tokenizer->peekNextToken().Text;
470 ArgValue.Range = Tokenizer->peekNextToken().Range;
471 if (!parseExpressionImpl(&ArgValue.Value)) {
475 Args.push_back(ArgValue);
480 if (EndToken.Kind == TokenInfo::TK_Eof) {
481 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
486 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
487 if (!parseBindID(BindID))
494 // Merge the start and end infos.
495 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
496 NameToken.Text, NameToken.Range);
497 SourceRange MatcherRange = NameToken.Range;
498 MatcherRange.End = EndToken.Range.End;
499 VariantMatcher Result = S->actOnMatcherExpression(
500 *Ctor, MatcherRange, BindID, Args, Error);
501 if (Result.isNull()) return false;
507 // If the prefix of this completion matches the completion token, add it to
508 // Completions minus the prefix.
509 void Parser::addCompletion(const TokenInfo &CompToken,
510 const MatcherCompletion& Completion) {
511 if (StringRef(Completion.TypedText).startswith(CompToken.Text) &&
512 Completion.Specificity > 0) {
513 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
514 Completion.MatcherDecl, Completion.Specificity);
518 std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
519 ArrayRef<ArgKind> AcceptedTypes) {
520 if (!NamedValues) return std::vector<MatcherCompletion>();
521 std::vector<MatcherCompletion> Result;
522 for (const auto &Entry : *NamedValues) {
523 unsigned Specificity;
524 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
526 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
527 Result.emplace_back(Entry.getKey(), Decl, Specificity);
533 void Parser::addExpressionCompletions() {
534 const TokenInfo CompToken = Tokenizer->consumeNextToken();
535 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
537 // We cannot complete code if there is an invalid element on the context
539 for (ContextStackTy::iterator I = ContextStack.begin(),
540 E = ContextStack.end();
546 auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
547 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
548 addCompletion(CompToken, Completion);
551 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
552 addCompletion(CompToken, Completion);
556 /// Parse an <Expression>
557 bool Parser::parseExpressionImpl(VariantValue *Value) {
558 switch (Tokenizer->nextTokenKind()) {
559 case TokenInfo::TK_Literal:
560 *Value = Tokenizer->consumeNextToken().Value;
563 case TokenInfo::TK_Ident:
564 return parseIdentifierPrefixImpl(Value);
566 case TokenInfo::TK_CodeCompletion:
567 addExpressionCompletions();
570 case TokenInfo::TK_Eof:
571 Error->addError(Tokenizer->consumeNextToken().Range,
572 Error->ET_ParserNoCode);
575 case TokenInfo::TK_Error:
576 // This error was already reported by the tokenizer.
579 case TokenInfo::TK_OpenParen:
580 case TokenInfo::TK_CloseParen:
581 case TokenInfo::TK_Comma:
582 case TokenInfo::TK_Period:
583 case TokenInfo::TK_InvalidChar:
584 const TokenInfo Token = Tokenizer->consumeNextToken();
585 Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
589 llvm_unreachable("Unknown token kind.");
592 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
594 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
595 const NamedValueMap *NamedValues, Diagnostics *Error)
596 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
597 NamedValues(NamedValues), Error(Error) {}
599 Parser::RegistrySema::~RegistrySema() = default;
601 llvm::Optional<MatcherCtor>
602 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
603 return Registry::lookupMatcherCtor(MatcherName);
606 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
607 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
608 ArrayRef<ParserValue> Args, Diagnostics *Error) {
609 if (BindID.empty()) {
610 return Registry::constructMatcher(Ctor, NameRange, Args, Error);
612 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
617 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
618 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
619 return Registry::getAcceptedCompletionTypes(Context);
622 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
623 ArrayRef<ArgKind> AcceptedTypes) {
624 return Registry::getMatcherCompletions(AcceptedTypes);
627 bool Parser::parseExpression(StringRef Code, Sema *S,
628 const NamedValueMap *NamedValues,
629 VariantValue *Value, Diagnostics *Error) {
630 CodeTokenizer Tokenizer(Code, Error);
631 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
633 if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
634 Error->addError(Tokenizer.peekNextToken().Range,
635 Error->ET_ParserTrailingCode);
641 std::vector<MatcherCompletion>
642 Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S,
643 const NamedValueMap *NamedValues) {
645 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
646 Parser P(&Tokenizer, S, NamedValues, &Error);
648 P.parseExpressionImpl(&Dummy);
650 // Sort by specificity, then by name.
651 llvm::sort(P.Completions,
652 [](const MatcherCompletion &A, const MatcherCompletion &B) {
653 if (A.Specificity != B.Specificity)
654 return A.Specificity > B.Specificity;
655 return A.TypedText < B.TypedText;
658 return P.Completions;
661 llvm::Optional<DynTypedMatcher>
662 Parser::parseMatcherExpression(StringRef Code, Sema *S,
663 const NamedValueMap *NamedValues,
664 Diagnostics *Error) {
666 if (!parseExpression(Code, S, NamedValues, &Value, Error))
667 return llvm::Optional<DynTypedMatcher>();
668 if (!Value.isMatcher()) {
669 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
670 return llvm::Optional<DynTypedMatcher>();
672 llvm::Optional<DynTypedMatcher> Result =
673 Value.getMatcher().getSingleMatcher();
674 if (!Result.hasValue()) {
675 Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
676 << Value.getTypeAsString();
681 } // namespace dynamic
682 } // namespace ast_matchers