1 //===- Parser.cpp - Matcher expression parser -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// Recursive parser implementation for the matcher expression grammar.
13 //===----------------------------------------------------------------------===//
15 #include "clang/ASTMatchers/Dynamic/Parser.h"
16 #include "clang/ASTMatchers/ASTMatchersInternal.h"
17 #include "clang/ASTMatchers/Dynamic/Diagnostics.h"
18 #include "clang/ASTMatchers/Dynamic/Registry.h"
19 #include "clang/Basic/CharInfo.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include "llvm/Support/ManagedStatic.h"
34 namespace ast_matchers {
37 /// Simple structure to hold information for one token from the parser.
38 struct Parser::TokenInfo {
39 /// Different possible tokens.
53 /// Some known identifiers.
54 static const char* const ID_Bind;
56 TokenInfo() = default;
59 TokenKind Kind = TK_Eof;
64 const char* const Parser::TokenInfo::ID_Bind = "bind";
66 /// Simple tokenizer for the parser.
67 class Parser::CodeTokenizer {
69 explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
70 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
71 NextToken = getNextToken();
74 CodeTokenizer(StringRef MatcherCode, Diagnostics *Error,
75 unsigned CodeCompletionOffset)
76 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
77 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
78 NextToken = getNextToken();
81 /// Returns but doesn't consume the next token.
82 const TokenInfo &peekNextToken() const { return NextToken; }
84 /// Consumes and returns the next token.
85 TokenInfo consumeNextToken() {
86 TokenInfo ThisToken = NextToken;
87 NextToken = getNextToken();
91 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
94 TokenInfo getNextToken() {
97 Result.Range.Start = currentLocation();
99 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
100 Result.Kind = TokenInfo::TK_CodeCompletion;
101 Result.Text = StringRef(CodeCompletionLocation, 0);
102 CodeCompletionLocation = nullptr;
107 Result.Kind = TokenInfo::TK_Eof;
114 Result.Kind = TokenInfo::TK_Comma;
115 Result.Text = Code.substr(0, 1);
116 Code = Code.drop_front();
119 Result.Kind = TokenInfo::TK_Period;
120 Result.Text = Code.substr(0, 1);
121 Code = Code.drop_front();
124 Result.Kind = TokenInfo::TK_OpenParen;
125 Result.Text = Code.substr(0, 1);
126 Code = Code.drop_front();
129 Result.Kind = TokenInfo::TK_CloseParen;
130 Result.Text = Code.substr(0, 1);
131 Code = Code.drop_front();
136 // Parse a string literal.
137 consumeStringLiteral(&Result);
140 case '0': case '1': case '2': case '3': case '4':
141 case '5': case '6': case '7': case '8': case '9':
142 // Parse an unsigned and float literal.
143 consumeNumberLiteral(&Result);
147 if (isAlphanumeric(Code[0])) {
148 // Parse an identifier
149 size_t TokenLength = 1;
151 // A code completion location in/immediately after an identifier will
152 // cause the portion of the identifier before the code completion
153 // location to become a code completion token.
154 if (CodeCompletionLocation == Code.data() + TokenLength) {
155 CodeCompletionLocation = nullptr;
156 Result.Kind = TokenInfo::TK_CodeCompletion;
157 Result.Text = Code.substr(0, TokenLength);
158 Code = Code.drop_front(TokenLength);
161 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
165 if (TokenLength == 4 && Code.startswith("true")) {
166 Result.Kind = TokenInfo::TK_Literal;
168 } else if (TokenLength == 5 && Code.startswith("false")) {
169 Result.Kind = TokenInfo::TK_Literal;
170 Result.Value = false;
172 Result.Kind = TokenInfo::TK_Ident;
173 Result.Text = Code.substr(0, TokenLength);
175 Code = Code.drop_front(TokenLength);
177 Result.Kind = TokenInfo::TK_InvalidChar;
178 Result.Text = Code.substr(0, 1);
179 Code = Code.drop_front(1);
184 Result.Range.End = currentLocation();
188 /// Consume an unsigned and float literal.
189 void consumeNumberLiteral(TokenInfo *Result) {
190 bool isFloatingLiteral = false;
192 if (Code.size() > 1) {
193 // Consume the 'x' or 'b' radix modifier, if present.
194 switch (toLowercase(Code[1])) {
195 case 'x': case 'b': Length = 2;
198 while (Length < Code.size() && isHexDigit(Code[Length]))
201 // Try to recognize a floating point literal.
202 while (Length < Code.size()) {
203 char c = Code[Length];
204 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
205 isFloatingLiteral = true;
212 Result->Text = Code.substr(0, Length);
213 Code = Code.drop_front(Length);
215 if (isFloatingLiteral) {
218 std::string Text = Result->Text.str();
219 double doubleValue = strtod(Text.c_str(), &end);
220 if (*end == 0 && errno == 0) {
221 Result->Kind = TokenInfo::TK_Literal;
222 Result->Value = doubleValue;
227 if (!Result->Text.getAsInteger(0, Value)) {
228 Result->Kind = TokenInfo::TK_Literal;
229 Result->Value = Value;
235 Range.Start = Result->Range.Start;
236 Range.End = currentLocation();
237 Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
238 Result->Kind = TokenInfo::TK_Error;
241 /// Consume a string literal.
243 /// \c Code must be positioned at the start of the literal (the opening
244 /// quote). Consumed until it finds the same closing quote character.
245 void consumeStringLiteral(TokenInfo *Result) {
246 bool InEscape = false;
247 const char Marker = Code[0];
248 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
253 if (Code[Length] == '\\') {
257 if (Code[Length] == Marker) {
258 Result->Kind = TokenInfo::TK_Literal;
259 Result->Text = Code.substr(0, Length + 1);
260 Result->Value = Code.substr(1, Length - 1);
261 Code = Code.drop_front(Length + 1);
266 StringRef ErrorText = Code;
267 Code = Code.drop_front(Code.size());
269 Range.Start = Result->Range.Start;
270 Range.End = currentLocation();
271 Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
272 Result->Kind = TokenInfo::TK_Error;
275 /// Consume all leading whitespace from \c Code.
276 void consumeWhitespace() {
277 while (!Code.empty() && isWhitespace(Code[0])) {
278 if (Code[0] == '\n') {
280 StartOfLine = Code.drop_front();
282 Code = Code.drop_front();
286 SourceLocation currentLocation() {
287 SourceLocation Location;
288 Location.Line = Line;
289 Location.Column = Code.data() - StartOfLine.data() + 1;
294 StringRef StartOfLine;
298 const char *CodeCompletionLocation = nullptr;
301 Parser::Sema::~Sema() = default;
303 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
304 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
308 std::vector<MatcherCompletion>
309 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
313 struct Parser::ScopedContextEntry {
316 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
317 P->ContextStack.push_back(std::make_pair(C, 0u));
320 ~ScopedContextEntry() {
321 P->ContextStack.pop_back();
325 ++P->ContextStack.back().second;
329 /// Parse expressions that start with an identifier.
331 /// This function can parse named values and matchers.
332 /// In case of failure it will try to determine the user's intent to give
333 /// an appropriate error message.
334 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
335 const TokenInfo NameToken = Tokenizer->consumeNextToken();
337 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
338 // Parse as a named value.
339 if (const VariantValue NamedValue =
340 NamedValues ? NamedValues->lookup(NameToken.Text)
345 // If the syntax is correct and the name is not a matcher either, report
346 // unknown named value.
347 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
348 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
349 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
350 !S->lookupMatcherCtor(NameToken.Text)) {
351 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
355 // Otherwise, fallback to the matcher parser.
358 // Parse as a matcher expression.
359 return parseMatcherExpressionImpl(NameToken, Value);
362 /// Parse and validate a matcher expression.
363 /// \return \c true on success, in which case \c Value has the matcher parsed.
364 /// If the input is malformed, or some argument has an error, it
365 /// returns \c false.
366 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
367 VariantValue *Value) {
368 assert(NameToken.Kind == TokenInfo::TK_Ident);
369 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
370 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
371 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
376 llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
379 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
381 // Do not return here. We need to continue to give completion suggestions.
384 std::vector<ParserValue> Args;
388 ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
390 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
391 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
393 EndToken = Tokenizer->consumeNextToken();
397 // We must find a , token to continue.
398 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
399 if (CommaToken.Kind != TokenInfo::TK_Comma) {
400 Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
406 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
407 NameToken.Text, NameToken.Range,
409 ParserValue ArgValue;
410 ArgValue.Text = Tokenizer->peekNextToken().Text;
411 ArgValue.Range = Tokenizer->peekNextToken().Range;
412 if (!parseExpressionImpl(&ArgValue.Value)) {
416 Args.push_back(ArgValue);
421 if (EndToken.Kind == TokenInfo::TK_Eof) {
422 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
427 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
428 // Parse .bind("foo")
429 Tokenizer->consumeNextToken(); // consume the period.
430 const TokenInfo BindToken = Tokenizer->consumeNextToken();
431 if (BindToken.Kind == TokenInfo::TK_CodeCompletion) {
432 addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1));
436 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
437 const TokenInfo IDToken = Tokenizer->consumeNextToken();
438 const TokenInfo CloseToken = Tokenizer->consumeNextToken();
440 // TODO: We could use different error codes for each/some to be more
441 // explicit about the syntax error.
442 if (BindToken.Kind != TokenInfo::TK_Ident ||
443 BindToken.Text != TokenInfo::ID_Bind) {
444 Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
447 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
448 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
451 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
452 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
455 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
456 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
459 BindID = IDToken.Value.getString();
465 // Merge the start and end infos.
466 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
467 NameToken.Text, NameToken.Range);
468 SourceRange MatcherRange = NameToken.Range;
469 MatcherRange.End = EndToken.Range.End;
470 VariantMatcher Result = S->actOnMatcherExpression(
471 *Ctor, MatcherRange, BindID, Args, Error);
472 if (Result.isNull()) return false;
478 // If the prefix of this completion matches the completion token, add it to
479 // Completions minus the prefix.
480 void Parser::addCompletion(const TokenInfo &CompToken,
481 const MatcherCompletion& Completion) {
482 if (StringRef(Completion.TypedText).startswith(CompToken.Text) &&
483 Completion.Specificity > 0) {
484 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
485 Completion.MatcherDecl, Completion.Specificity);
489 std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
490 ArrayRef<ArgKind> AcceptedTypes) {
491 if (!NamedValues) return std::vector<MatcherCompletion>();
492 std::vector<MatcherCompletion> Result;
493 for (const auto &Entry : *NamedValues) {
494 unsigned Specificity;
495 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
497 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
498 Result.emplace_back(Entry.getKey(), Decl, Specificity);
504 void Parser::addExpressionCompletions() {
505 const TokenInfo CompToken = Tokenizer->consumeNextToken();
506 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
508 // We cannot complete code if there is an invalid element on the context
510 for (ContextStackTy::iterator I = ContextStack.begin(),
511 E = ContextStack.end();
517 auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
518 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
519 addCompletion(CompToken, Completion);
522 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
523 addCompletion(CompToken, Completion);
527 /// Parse an <Expression>
528 bool Parser::parseExpressionImpl(VariantValue *Value) {
529 switch (Tokenizer->nextTokenKind()) {
530 case TokenInfo::TK_Literal:
531 *Value = Tokenizer->consumeNextToken().Value;
534 case TokenInfo::TK_Ident:
535 return parseIdentifierPrefixImpl(Value);
537 case TokenInfo::TK_CodeCompletion:
538 addExpressionCompletions();
541 case TokenInfo::TK_Eof:
542 Error->addError(Tokenizer->consumeNextToken().Range,
543 Error->ET_ParserNoCode);
546 case TokenInfo::TK_Error:
547 // This error was already reported by the tokenizer.
550 case TokenInfo::TK_OpenParen:
551 case TokenInfo::TK_CloseParen:
552 case TokenInfo::TK_Comma:
553 case TokenInfo::TK_Period:
554 case TokenInfo::TK_InvalidChar:
555 const TokenInfo Token = Tokenizer->consumeNextToken();
556 Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
560 llvm_unreachable("Unknown token kind.");
563 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
565 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
566 const NamedValueMap *NamedValues, Diagnostics *Error)
567 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
568 NamedValues(NamedValues), Error(Error) {}
570 Parser::RegistrySema::~RegistrySema() = default;
572 llvm::Optional<MatcherCtor>
573 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
574 return Registry::lookupMatcherCtor(MatcherName);
577 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
578 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
579 ArrayRef<ParserValue> Args, Diagnostics *Error) {
580 if (BindID.empty()) {
581 return Registry::constructMatcher(Ctor, NameRange, Args, Error);
583 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
588 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
589 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
590 return Registry::getAcceptedCompletionTypes(Context);
593 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
594 ArrayRef<ArgKind> AcceptedTypes) {
595 return Registry::getMatcherCompletions(AcceptedTypes);
598 bool Parser::parseExpression(StringRef Code, Sema *S,
599 const NamedValueMap *NamedValues,
600 VariantValue *Value, Diagnostics *Error) {
601 CodeTokenizer Tokenizer(Code, Error);
602 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
604 if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
605 Error->addError(Tokenizer.peekNextToken().Range,
606 Error->ET_ParserTrailingCode);
612 std::vector<MatcherCompletion>
613 Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S,
614 const NamedValueMap *NamedValues) {
616 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
617 Parser P(&Tokenizer, S, NamedValues, &Error);
619 P.parseExpressionImpl(&Dummy);
621 // Sort by specificity, then by name.
622 llvm::sort(P.Completions.begin(), P.Completions.end(),
623 [](const MatcherCompletion &A, const MatcherCompletion &B) {
624 if (A.Specificity != B.Specificity)
625 return A.Specificity > B.Specificity;
626 return A.TypedText < B.TypedText;
629 return P.Completions;
632 llvm::Optional<DynTypedMatcher>
633 Parser::parseMatcherExpression(StringRef Code, Sema *S,
634 const NamedValueMap *NamedValues,
635 Diagnostics *Error) {
637 if (!parseExpression(Code, S, NamedValues, &Value, Error))
638 return llvm::Optional<DynTypedMatcher>();
639 if (!Value.isMatcher()) {
640 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
641 return llvm::Optional<DynTypedMatcher>();
643 llvm::Optional<DynTypedMatcher> Result =
644 Value.getMatcher().getSingleMatcher();
645 if (!Result.hasValue()) {
646 Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
647 << Value.getTypeAsString();
652 } // namespace dynamic
653 } // namespace ast_matchers