1 //===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Recursive parser implementation for the matcher expression grammar.
13 //===----------------------------------------------------------------------===//
15 #include "clang/ASTMatchers/Dynamic/Parser.h"
16 #include "clang/ASTMatchers/Dynamic/Registry.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/Support/ManagedStatic.h"
24 namespace ast_matchers {
27 /// \brief Simple structure to hold information for one token from the parser.
28 struct Parser::TokenInfo {
29 /// \brief Different possible tokens.
43 /// \brief Some known identifiers.
44 static const char* const ID_Bind;
46 TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}
54 const char* const Parser::TokenInfo::ID_Bind = "bind";
56 /// \brief Simple tokenizer for the parser.
57 class Parser::CodeTokenizer {
59 explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
60 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
61 CodeCompletionLocation(nullptr) {
62 NextToken = getNextToken();
65 CodeTokenizer(StringRef MatcherCode, Diagnostics *Error,
66 unsigned CodeCompletionOffset)
67 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
68 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
69 NextToken = getNextToken();
72 /// \brief Returns but doesn't consume the next token.
73 const TokenInfo &peekNextToken() const { return NextToken; }
75 /// \brief Consumes and returns the next token.
76 TokenInfo consumeNextToken() {
77 TokenInfo ThisToken = NextToken;
78 NextToken = getNextToken();
82 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
85 TokenInfo getNextToken() {
88 Result.Range.Start = currentLocation();
90 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
91 Result.Kind = TokenInfo::TK_CodeCompletion;
92 Result.Text = StringRef(CodeCompletionLocation, 0);
93 CodeCompletionLocation = nullptr;
98 Result.Kind = TokenInfo::TK_Eof;
105 Result.Kind = TokenInfo::TK_Comma;
106 Result.Text = Code.substr(0, 1);
107 Code = Code.drop_front();
110 Result.Kind = TokenInfo::TK_Period;
111 Result.Text = Code.substr(0, 1);
112 Code = Code.drop_front();
115 Result.Kind = TokenInfo::TK_OpenParen;
116 Result.Text = Code.substr(0, 1);
117 Code = Code.drop_front();
120 Result.Kind = TokenInfo::TK_CloseParen;
121 Result.Text = Code.substr(0, 1);
122 Code = Code.drop_front();
127 // Parse a string literal.
128 consumeStringLiteral(&Result);
131 case '0': case '1': case '2': case '3': case '4':
132 case '5': case '6': case '7': case '8': case '9':
133 // Parse an unsigned literal.
134 consumeUnsignedLiteral(&Result);
138 if (isAlphanumeric(Code[0])) {
139 // Parse an identifier
140 size_t TokenLength = 1;
142 // A code completion location in/immediately after an identifier will
143 // cause the portion of the identifier before the code completion
144 // location to become a code completion token.
145 if (CodeCompletionLocation == Code.data() + TokenLength) {
146 CodeCompletionLocation = nullptr;
147 Result.Kind = TokenInfo::TK_CodeCompletion;
148 Result.Text = Code.substr(0, TokenLength);
149 Code = Code.drop_front(TokenLength);
152 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
156 Result.Kind = TokenInfo::TK_Ident;
157 Result.Text = Code.substr(0, TokenLength);
158 Code = Code.drop_front(TokenLength);
160 Result.Kind = TokenInfo::TK_InvalidChar;
161 Result.Text = Code.substr(0, 1);
162 Code = Code.drop_front(1);
167 Result.Range.End = currentLocation();
171 /// \brief Consume an unsigned literal.
172 void consumeUnsignedLiteral(TokenInfo *Result) {
174 if (Code.size() > 1) {
175 // Consume the 'x' or 'b' radix modifier, if present.
176 switch (toLowercase(Code[1])) {
177 case 'x': case 'b': Length = 2;
180 while (Length < Code.size() && isHexDigit(Code[Length]))
183 Result->Text = Code.substr(0, Length);
184 Code = Code.drop_front(Length);
187 if (!Result->Text.getAsInteger(0, Value)) {
188 Result->Kind = TokenInfo::TK_Literal;
189 Result->Value = Value;
192 Range.Start = Result->Range.Start;
193 Range.End = currentLocation();
194 Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text;
195 Result->Kind = TokenInfo::TK_Error;
199 /// \brief Consume a string literal.
201 /// \c Code must be positioned at the start of the literal (the opening
202 /// quote). Consumed until it finds the same closing quote character.
203 void consumeStringLiteral(TokenInfo *Result) {
204 bool InEscape = false;
205 const char Marker = Code[0];
206 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
211 if (Code[Length] == '\\') {
215 if (Code[Length] == Marker) {
216 Result->Kind = TokenInfo::TK_Literal;
217 Result->Text = Code.substr(0, Length + 1);
218 Result->Value = Code.substr(1, Length - 1);
219 Code = Code.drop_front(Length + 1);
224 StringRef ErrorText = Code;
225 Code = Code.drop_front(Code.size());
227 Range.Start = Result->Range.Start;
228 Range.End = currentLocation();
229 Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
230 Result->Kind = TokenInfo::TK_Error;
233 /// \brief Consume all leading whitespace from \c Code.
234 void consumeWhitespace() {
235 while (!Code.empty() && isWhitespace(Code[0])) {
236 if (Code[0] == '\n') {
238 StartOfLine = Code.drop_front();
240 Code = Code.drop_front();
244 SourceLocation currentLocation() {
245 SourceLocation Location;
246 Location.Line = Line;
247 Location.Column = Code.data() - StartOfLine.data() + 1;
252 StringRef StartOfLine;
256 const char *CodeCompletionLocation;
259 Parser::Sema::~Sema() {}
261 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
262 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
263 return std::vector<ArgKind>();
266 std::vector<MatcherCompletion>
267 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
268 return std::vector<MatcherCompletion>();
271 struct Parser::ScopedContextEntry {
274 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
275 P->ContextStack.push_back(std::make_pair(C, 0u));
278 ~ScopedContextEntry() {
279 P->ContextStack.pop_back();
283 ++P->ContextStack.back().second;
287 /// \brief Parse expressions that start with an identifier.
289 /// This function can parse named values and matchers.
290 /// In case of failure it will try to determine the user's intent to give
291 /// an appropriate error message.
292 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
293 const TokenInfo NameToken = Tokenizer->consumeNextToken();
295 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
296 // Parse as a named value.
297 if (const VariantValue NamedValue =
298 NamedValues ? NamedValues->lookup(NameToken.Text)
303 // If the syntax is correct and the name is not a matcher either, report
304 // unknown named value.
305 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
306 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
307 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
308 !S->lookupMatcherCtor(NameToken.Text)) {
309 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
313 // Otherwise, fallback to the matcher parser.
316 // Parse as a matcher expression.
317 return parseMatcherExpressionImpl(NameToken, Value);
320 /// \brief Parse and validate a matcher expression.
321 /// \return \c true on success, in which case \c Value has the matcher parsed.
322 /// If the input is malformed, or some argument has an error, it
323 /// returns \c false.
324 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
325 VariantValue *Value) {
326 assert(NameToken.Kind == TokenInfo::TK_Ident);
327 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
328 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
329 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
334 llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
337 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
339 // Do not return here. We need to continue to give completion suggestions.
342 std::vector<ParserValue> Args;
346 ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
348 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
349 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
351 EndToken = Tokenizer->consumeNextToken();
354 if (Args.size() > 0) {
355 // We must find a , token to continue.
356 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
357 if (CommaToken.Kind != TokenInfo::TK_Comma) {
358 Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
364 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
365 NameToken.Text, NameToken.Range,
367 ParserValue ArgValue;
368 ArgValue.Text = Tokenizer->peekNextToken().Text;
369 ArgValue.Range = Tokenizer->peekNextToken().Range;
370 if (!parseExpressionImpl(&ArgValue.Value)) {
374 Args.push_back(ArgValue);
379 if (EndToken.Kind == TokenInfo::TK_Eof) {
380 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
385 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
386 // Parse .bind("foo")
387 Tokenizer->consumeNextToken(); // consume the period.
388 const TokenInfo BindToken = Tokenizer->consumeNextToken();
389 if (BindToken.Kind == TokenInfo::TK_CodeCompletion) {
390 addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1));
394 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
395 const TokenInfo IDToken = Tokenizer->consumeNextToken();
396 const TokenInfo CloseToken = Tokenizer->consumeNextToken();
398 // TODO: We could use different error codes for each/some to be more
399 // explicit about the syntax error.
400 if (BindToken.Kind != TokenInfo::TK_Ident ||
401 BindToken.Text != TokenInfo::ID_Bind) {
402 Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
405 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
406 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
409 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
410 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
413 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
414 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
417 BindID = IDToken.Value.getString();
423 // Merge the start and end infos.
424 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
425 NameToken.Text, NameToken.Range);
426 SourceRange MatcherRange = NameToken.Range;
427 MatcherRange.End = EndToken.Range.End;
428 VariantMatcher Result = S->actOnMatcherExpression(
429 *Ctor, MatcherRange, BindID, Args, Error);
430 if (Result.isNull()) return false;
436 // If the prefix of this completion matches the completion token, add it to
437 // Completions minus the prefix.
438 void Parser::addCompletion(const TokenInfo &CompToken,
439 const MatcherCompletion& Completion) {
440 if (StringRef(Completion.TypedText).startswith(CompToken.Text) &&
441 Completion.Specificity > 0) {
442 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
443 Completion.MatcherDecl, Completion.Specificity);
447 std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
448 ArrayRef<ArgKind> AcceptedTypes) {
449 if (!NamedValues) return std::vector<MatcherCompletion>();
450 std::vector<MatcherCompletion> Result;
451 for (const auto &Entry : *NamedValues) {
452 unsigned Specificity;
453 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
455 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
456 Result.emplace_back(Entry.getKey(), Decl, Specificity);
462 void Parser::addExpressionCompletions() {
463 const TokenInfo CompToken = Tokenizer->consumeNextToken();
464 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
466 // We cannot complete code if there is an invalid element on the context
468 for (ContextStackTy::iterator I = ContextStack.begin(),
469 E = ContextStack.end();
475 auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
476 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
477 addCompletion(CompToken, Completion);
480 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
481 addCompletion(CompToken, Completion);
485 /// \brief Parse an <Expresssion>
486 bool Parser::parseExpressionImpl(VariantValue *Value) {
487 switch (Tokenizer->nextTokenKind()) {
488 case TokenInfo::TK_Literal:
489 *Value = Tokenizer->consumeNextToken().Value;
492 case TokenInfo::TK_Ident:
493 return parseIdentifierPrefixImpl(Value);
495 case TokenInfo::TK_CodeCompletion:
496 addExpressionCompletions();
499 case TokenInfo::TK_Eof:
500 Error->addError(Tokenizer->consumeNextToken().Range,
501 Error->ET_ParserNoCode);
504 case TokenInfo::TK_Error:
505 // This error was already reported by the tokenizer.
508 case TokenInfo::TK_OpenParen:
509 case TokenInfo::TK_CloseParen:
510 case TokenInfo::TK_Comma:
511 case TokenInfo::TK_Period:
512 case TokenInfo::TK_InvalidChar:
513 const TokenInfo Token = Tokenizer->consumeNextToken();
514 Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
518 llvm_unreachable("Unknown token kind.");
521 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
523 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
524 const NamedValueMap *NamedValues, Diagnostics *Error)
525 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
526 NamedValues(NamedValues), Error(Error) {}
528 Parser::RegistrySema::~RegistrySema() {}
530 llvm::Optional<MatcherCtor>
531 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
532 return Registry::lookupMatcherCtor(MatcherName);
535 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
536 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
537 ArrayRef<ParserValue> Args, Diagnostics *Error) {
538 if (BindID.empty()) {
539 return Registry::constructMatcher(Ctor, NameRange, Args, Error);
541 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
546 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
547 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
548 return Registry::getAcceptedCompletionTypes(Context);
551 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
552 ArrayRef<ArgKind> AcceptedTypes) {
553 return Registry::getMatcherCompletions(AcceptedTypes);
556 bool Parser::parseExpression(StringRef Code, Sema *S,
557 const NamedValueMap *NamedValues,
558 VariantValue *Value, Diagnostics *Error) {
559 CodeTokenizer Tokenizer(Code, Error);
560 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
562 if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
563 Error->addError(Tokenizer.peekNextToken().Range,
564 Error->ET_ParserTrailingCode);
570 std::vector<MatcherCompletion>
571 Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S,
572 const NamedValueMap *NamedValues) {
574 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
575 Parser P(&Tokenizer, S, NamedValues, &Error);
577 P.parseExpressionImpl(&Dummy);
579 // Sort by specificity, then by name.
580 std::sort(P.Completions.begin(), P.Completions.end(),
581 [](const MatcherCompletion &A, const MatcherCompletion &B) {
582 if (A.Specificity != B.Specificity)
583 return A.Specificity > B.Specificity;
584 return A.TypedText < B.TypedText;
587 return P.Completions;
590 llvm::Optional<DynTypedMatcher>
591 Parser::parseMatcherExpression(StringRef Code, Sema *S,
592 const NamedValueMap *NamedValues,
593 Diagnostics *Error) {
595 if (!parseExpression(Code, S, NamedValues, &Value, Error))
596 return llvm::Optional<DynTypedMatcher>();
597 if (!Value.isMatcher()) {
598 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
599 return llvm::Optional<DynTypedMatcher>();
601 llvm::Optional<DynTypedMatcher> Result =
602 Value.getMatcher().getSingleMatcher();
603 if (!Result.hasValue()) {
604 Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
605 << Value.getTypeAsString();
610 } // namespace dynamic
611 } // namespace ast_matchers