1 //===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Recursive parser implementation for the matcher expression grammar.
13 //===----------------------------------------------------------------------===//
15 #include "clang/ASTMatchers/Dynamic/Parser.h"
16 #include "clang/ASTMatchers/Dynamic/Registry.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/Support/ManagedStatic.h"
24 namespace ast_matchers {
27 /// \brief Simple structure to hold information for one token from the parser.
28 struct Parser::TokenInfo {
29 /// \brief Different possible tokens.
43 /// \brief Some known identifiers.
44 static const char* const ID_Bind;
46 TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}
54 const char* const Parser::TokenInfo::ID_Bind = "bind";
56 /// \brief Simple tokenizer for the parser.
57 class Parser::CodeTokenizer {
59 explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
60 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
61 CodeCompletionLocation(nullptr) {
62 NextToken = getNextToken();
65 CodeTokenizer(StringRef MatcherCode, Diagnostics *Error,
66 unsigned CodeCompletionOffset)
67 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
68 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
69 NextToken = getNextToken();
72 /// \brief Returns but doesn't consume the next token.
73 const TokenInfo &peekNextToken() const { return NextToken; }
75 /// \brief Consumes and returns the next token.
76 TokenInfo consumeNextToken() {
77 TokenInfo ThisToken = NextToken;
78 NextToken = getNextToken();
82 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
85 TokenInfo getNextToken() {
88 Result.Range.Start = currentLocation();
90 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
91 Result.Kind = TokenInfo::TK_CodeCompletion;
92 Result.Text = StringRef(CodeCompletionLocation, 0);
93 CodeCompletionLocation = nullptr;
98 Result.Kind = TokenInfo::TK_Eof;
105 Result.Kind = TokenInfo::TK_Comma;
106 Result.Text = Code.substr(0, 1);
107 Code = Code.drop_front();
110 Result.Kind = TokenInfo::TK_Period;
111 Result.Text = Code.substr(0, 1);
112 Code = Code.drop_front();
115 Result.Kind = TokenInfo::TK_OpenParen;
116 Result.Text = Code.substr(0, 1);
117 Code = Code.drop_front();
120 Result.Kind = TokenInfo::TK_CloseParen;
121 Result.Text = Code.substr(0, 1);
122 Code = Code.drop_front();
127 // Parse a string literal.
128 consumeStringLiteral(&Result);
131 case '0': case '1': case '2': case '3': case '4':
132 case '5': case '6': case '7': case '8': case '9':
133 // Parse an unsigned and float literal.
134 consumeNumberLiteral(&Result);
138 if (isAlphanumeric(Code[0])) {
139 // Parse an identifier
140 size_t TokenLength = 1;
142 // A code completion location in/immediately after an identifier will
143 // cause the portion of the identifier before the code completion
144 // location to become a code completion token.
145 if (CodeCompletionLocation == Code.data() + TokenLength) {
146 CodeCompletionLocation = nullptr;
147 Result.Kind = TokenInfo::TK_CodeCompletion;
148 Result.Text = Code.substr(0, TokenLength);
149 Code = Code.drop_front(TokenLength);
152 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
156 if (TokenLength == 4 && Code.startswith("true")) {
157 Result.Kind = TokenInfo::TK_Literal;
159 } else if (TokenLength == 5 && Code.startswith("false")) {
160 Result.Kind = TokenInfo::TK_Literal;
161 Result.Value = false;
163 Result.Kind = TokenInfo::TK_Ident;
164 Result.Text = Code.substr(0, TokenLength);
166 Code = Code.drop_front(TokenLength);
168 Result.Kind = TokenInfo::TK_InvalidChar;
169 Result.Text = Code.substr(0, 1);
170 Code = Code.drop_front(1);
175 Result.Range.End = currentLocation();
179 /// \brief Consume an unsigned and float literal.
180 void consumeNumberLiteral(TokenInfo *Result) {
181 bool isFloatingLiteral = false;
183 if (Code.size() > 1) {
184 // Consume the 'x' or 'b' radix modifier, if present.
185 switch (toLowercase(Code[1])) {
186 case 'x': case 'b': Length = 2;
189 while (Length < Code.size() && isHexDigit(Code[Length]))
192 // Try to recognize a floating point literal.
193 while (Length < Code.size()) {
194 char c = Code[Length];
195 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
196 isFloatingLiteral = true;
203 Result->Text = Code.substr(0, Length);
204 Code = Code.drop_front(Length);
206 if (isFloatingLiteral) {
209 std::string Text = Result->Text.str();
210 double doubleValue = strtod(Text.c_str(), &end);
211 if (*end == 0 && errno == 0) {
212 Result->Kind = TokenInfo::TK_Literal;
213 Result->Value = doubleValue;
218 if (!Result->Text.getAsInteger(0, Value)) {
219 Result->Kind = TokenInfo::TK_Literal;
220 Result->Value = Value;
226 Range.Start = Result->Range.Start;
227 Range.End = currentLocation();
228 Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
229 Result->Kind = TokenInfo::TK_Error;
232 /// \brief Consume a string literal.
234 /// \c Code must be positioned at the start of the literal (the opening
235 /// quote). Consumed until it finds the same closing quote character.
236 void consumeStringLiteral(TokenInfo *Result) {
237 bool InEscape = false;
238 const char Marker = Code[0];
239 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
244 if (Code[Length] == '\\') {
248 if (Code[Length] == Marker) {
249 Result->Kind = TokenInfo::TK_Literal;
250 Result->Text = Code.substr(0, Length + 1);
251 Result->Value = Code.substr(1, Length - 1);
252 Code = Code.drop_front(Length + 1);
257 StringRef ErrorText = Code;
258 Code = Code.drop_front(Code.size());
260 Range.Start = Result->Range.Start;
261 Range.End = currentLocation();
262 Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
263 Result->Kind = TokenInfo::TK_Error;
266 /// \brief Consume all leading whitespace from \c Code.
267 void consumeWhitespace() {
268 while (!Code.empty() && isWhitespace(Code[0])) {
269 if (Code[0] == '\n') {
271 StartOfLine = Code.drop_front();
273 Code = Code.drop_front();
277 SourceLocation currentLocation() {
278 SourceLocation Location;
279 Location.Line = Line;
280 Location.Column = Code.data() - StartOfLine.data() + 1;
285 StringRef StartOfLine;
289 const char *CodeCompletionLocation;
292 Parser::Sema::~Sema() {}
294 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
295 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
296 return std::vector<ArgKind>();
299 std::vector<MatcherCompletion>
300 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
301 return std::vector<MatcherCompletion>();
304 struct Parser::ScopedContextEntry {
307 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
308 P->ContextStack.push_back(std::make_pair(C, 0u));
311 ~ScopedContextEntry() {
312 P->ContextStack.pop_back();
316 ++P->ContextStack.back().second;
320 /// \brief Parse expressions that start with an identifier.
322 /// This function can parse named values and matchers.
323 /// In case of failure it will try to determine the user's intent to give
324 /// an appropriate error message.
325 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
326 const TokenInfo NameToken = Tokenizer->consumeNextToken();
328 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
329 // Parse as a named value.
330 if (const VariantValue NamedValue =
331 NamedValues ? NamedValues->lookup(NameToken.Text)
336 // If the syntax is correct and the name is not a matcher either, report
337 // unknown named value.
338 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
339 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
340 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
341 !S->lookupMatcherCtor(NameToken.Text)) {
342 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
346 // Otherwise, fallback to the matcher parser.
349 // Parse as a matcher expression.
350 return parseMatcherExpressionImpl(NameToken, Value);
353 /// \brief Parse and validate a matcher expression.
354 /// \return \c true on success, in which case \c Value has the matcher parsed.
355 /// If the input is malformed, or some argument has an error, it
356 /// returns \c false.
357 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
358 VariantValue *Value) {
359 assert(NameToken.Kind == TokenInfo::TK_Ident);
360 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
361 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
362 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
367 llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
370 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
372 // Do not return here. We need to continue to give completion suggestions.
375 std::vector<ParserValue> Args;
379 ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
381 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
382 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
384 EndToken = Tokenizer->consumeNextToken();
387 if (Args.size() > 0) {
388 // We must find a , token to continue.
389 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
390 if (CommaToken.Kind != TokenInfo::TK_Comma) {
391 Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
397 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
398 NameToken.Text, NameToken.Range,
400 ParserValue ArgValue;
401 ArgValue.Text = Tokenizer->peekNextToken().Text;
402 ArgValue.Range = Tokenizer->peekNextToken().Range;
403 if (!parseExpressionImpl(&ArgValue.Value)) {
407 Args.push_back(ArgValue);
412 if (EndToken.Kind == TokenInfo::TK_Eof) {
413 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
418 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
419 // Parse .bind("foo")
420 Tokenizer->consumeNextToken(); // consume the period.
421 const TokenInfo BindToken = Tokenizer->consumeNextToken();
422 if (BindToken.Kind == TokenInfo::TK_CodeCompletion) {
423 addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1));
427 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
428 const TokenInfo IDToken = Tokenizer->consumeNextToken();
429 const TokenInfo CloseToken = Tokenizer->consumeNextToken();
431 // TODO: We could use different error codes for each/some to be more
432 // explicit about the syntax error.
433 if (BindToken.Kind != TokenInfo::TK_Ident ||
434 BindToken.Text != TokenInfo::ID_Bind) {
435 Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
438 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
439 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
442 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
443 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
446 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
447 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
450 BindID = IDToken.Value.getString();
456 // Merge the start and end infos.
457 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
458 NameToken.Text, NameToken.Range);
459 SourceRange MatcherRange = NameToken.Range;
460 MatcherRange.End = EndToken.Range.End;
461 VariantMatcher Result = S->actOnMatcherExpression(
462 *Ctor, MatcherRange, BindID, Args, Error);
463 if (Result.isNull()) return false;
469 // If the prefix of this completion matches the completion token, add it to
470 // Completions minus the prefix.
471 void Parser::addCompletion(const TokenInfo &CompToken,
472 const MatcherCompletion& Completion) {
473 if (StringRef(Completion.TypedText).startswith(CompToken.Text) &&
474 Completion.Specificity > 0) {
475 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
476 Completion.MatcherDecl, Completion.Specificity);
480 std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
481 ArrayRef<ArgKind> AcceptedTypes) {
482 if (!NamedValues) return std::vector<MatcherCompletion>();
483 std::vector<MatcherCompletion> Result;
484 for (const auto &Entry : *NamedValues) {
485 unsigned Specificity;
486 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
488 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
489 Result.emplace_back(Entry.getKey(), Decl, Specificity);
495 void Parser::addExpressionCompletions() {
496 const TokenInfo CompToken = Tokenizer->consumeNextToken();
497 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
499 // We cannot complete code if there is an invalid element on the context
501 for (ContextStackTy::iterator I = ContextStack.begin(),
502 E = ContextStack.end();
508 auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
509 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
510 addCompletion(CompToken, Completion);
513 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
514 addCompletion(CompToken, Completion);
518 /// \brief Parse an <Expresssion>
519 bool Parser::parseExpressionImpl(VariantValue *Value) {
520 switch (Tokenizer->nextTokenKind()) {
521 case TokenInfo::TK_Literal:
522 *Value = Tokenizer->consumeNextToken().Value;
525 case TokenInfo::TK_Ident:
526 return parseIdentifierPrefixImpl(Value);
528 case TokenInfo::TK_CodeCompletion:
529 addExpressionCompletions();
532 case TokenInfo::TK_Eof:
533 Error->addError(Tokenizer->consumeNextToken().Range,
534 Error->ET_ParserNoCode);
537 case TokenInfo::TK_Error:
538 // This error was already reported by the tokenizer.
541 case TokenInfo::TK_OpenParen:
542 case TokenInfo::TK_CloseParen:
543 case TokenInfo::TK_Comma:
544 case TokenInfo::TK_Period:
545 case TokenInfo::TK_InvalidChar:
546 const TokenInfo Token = Tokenizer->consumeNextToken();
547 Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
551 llvm_unreachable("Unknown token kind.");
554 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
556 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
557 const NamedValueMap *NamedValues, Diagnostics *Error)
558 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
559 NamedValues(NamedValues), Error(Error) {}
561 Parser::RegistrySema::~RegistrySema() {}
563 llvm::Optional<MatcherCtor>
564 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
565 return Registry::lookupMatcherCtor(MatcherName);
568 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
569 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
570 ArrayRef<ParserValue> Args, Diagnostics *Error) {
571 if (BindID.empty()) {
572 return Registry::constructMatcher(Ctor, NameRange, Args, Error);
574 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
579 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
580 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
581 return Registry::getAcceptedCompletionTypes(Context);
584 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
585 ArrayRef<ArgKind> AcceptedTypes) {
586 return Registry::getMatcherCompletions(AcceptedTypes);
589 bool Parser::parseExpression(StringRef Code, Sema *S,
590 const NamedValueMap *NamedValues,
591 VariantValue *Value, Diagnostics *Error) {
592 CodeTokenizer Tokenizer(Code, Error);
593 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
595 if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
596 Error->addError(Tokenizer.peekNextToken().Range,
597 Error->ET_ParserTrailingCode);
603 std::vector<MatcherCompletion>
604 Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S,
605 const NamedValueMap *NamedValues) {
607 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
608 Parser P(&Tokenizer, S, NamedValues, &Error);
610 P.parseExpressionImpl(&Dummy);
612 // Sort by specificity, then by name.
613 std::sort(P.Completions.begin(), P.Completions.end(),
614 [](const MatcherCompletion &A, const MatcherCompletion &B) {
615 if (A.Specificity != B.Specificity)
616 return A.Specificity > B.Specificity;
617 return A.TypedText < B.TypedText;
620 return P.Completions;
623 llvm::Optional<DynTypedMatcher>
624 Parser::parseMatcherExpression(StringRef Code, Sema *S,
625 const NamedValueMap *NamedValues,
626 Diagnostics *Error) {
628 if (!parseExpression(Code, S, NamedValues, &Value, Error))
629 return llvm::Optional<DynTypedMatcher>();
630 if (!Value.isMatcher()) {
631 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
632 return llvm::Optional<DynTypedMatcher>();
634 llvm::Optional<DynTypedMatcher> Result =
635 Value.getMatcher().getSingleMatcher();
636 if (!Result.hasValue()) {
637 Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
638 << Value.getTypeAsString();
643 } // namespace dynamic
644 } // namespace ast_matchers