1 //===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "CPlusPlusNameParser.h"
12 #include "clang/Basic/IdentifierTable.h"
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/Support/Threading.h"
17 using namespace lldb_private;
20 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
21 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
22 namespace tok = clang::tok;
24 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
25 m_next_token_index = 0;
26 Optional<ParsedFunction> result(None);
28 // Try to parse the name as function without a return type specified
29 // e.g. main(int, char*[])
31 Bookmark start_position = SetBookmark();
32 result = ParseFunctionImpl(false);
33 if (result && !HasMoreTokens())
37 // Try to parse the name as function with function pointer return type
38 // e.g. void (*get_func(const char*))()
39 result = ParseFuncPtr(true);
43 // Finally try to parse the name as a function with non-function return type
44 // e.g. int main(int, char*[])
45 result = ParseFunctionImpl(true);
51 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
52 m_next_token_index = 0;
53 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
59 result.basename = GetTextForRange(name_ranges.getValue().basename_range);
60 result.context = GetTextForRange(name_ranges.getValue().context_range);
64 bool CPlusPlusNameParser::HasMoreTokens() {
65 return m_next_token_index < m_tokens.size();
68 void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
70 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
72 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
83 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
87 if (!Peek().isOneOf(kinds...))
94 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
95 return Bookmark(m_next_token_index);
98 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
100 clang::Token &CPlusPlusNameParser::Peek() {
101 assert(HasMoreTokens());
102 return m_tokens[m_next_token_index];
105 Optional<ParsedFunction>
106 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
107 Bookmark start_position = SetBookmark();
108 if (expect_return_type) {
109 // Consume return type if it's expected.
110 if (!ConsumeTypename())
114 auto maybe_name = ParseFullNameImpl();
119 size_t argument_start = GetCurrentPosition();
120 if (!ConsumeArguments()) {
124 size_t qualifiers_start = GetCurrentPosition();
125 SkipFunctionQualifiers();
126 size_t end_position = GetCurrentPosition();
128 ParsedFunction result;
129 result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
130 result.name.context = GetTextForRange(maybe_name.getValue().context_range);
131 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
132 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
133 start_position.Remove();
137 Optional<ParsedFunction>
138 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
139 Bookmark start_position = SetBookmark();
140 if (expect_return_type) {
141 // Consume return type.
142 if (!ConsumeTypename())
146 if (!ConsumeToken(tok::l_paren))
148 if (!ConsumePtrsAndRefs())
152 Bookmark before_inner_function_pos = SetBookmark();
153 auto maybe_inner_function_name = ParseFunctionImpl(false);
154 if (maybe_inner_function_name)
155 if (ConsumeToken(tok::r_paren))
156 if (ConsumeArguments()) {
157 SkipFunctionQualifiers();
158 start_position.Remove();
159 before_inner_function_pos.Remove();
160 return maybe_inner_function_name;
164 auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
165 if (maybe_inner_function_ptr_name)
166 if (ConsumeToken(tok::r_paren))
167 if (ConsumeArguments()) {
168 SkipFunctionQualifiers();
169 start_position.Remove();
170 return maybe_inner_function_ptr_name;
175 bool CPlusPlusNameParser::ConsumeArguments() {
176 return ConsumeBrackets(tok::l_paren, tok::r_paren);
179 bool CPlusPlusNameParser::ConsumeTemplateArgs() {
180 Bookmark start_position = SetBookmark();
181 if (!HasMoreTokens() || Peek().getKind() != tok::less)
185 // Consuming template arguments is a bit trickier than consuming function
186 // arguments, because '<' '>' brackets are not always trivially balanced.
187 // In some rare cases tokens '<' and '>' can appear inside template arguments
188 // as arithmetic or shift operators not as template brackets.
189 // Examples: std::enable_if<(10u)<(64), bool>
190 // f<A<operator<(X,Y)::Subclass>>
191 // Good thing that compiler makes sure that really ambiguous cases of
192 // '>' usage should be enclosed within '()' brackets.
193 int template_counter = 1;
194 bool can_open_template = false;
195 while (HasMoreTokens() && template_counter > 0) {
196 tok::TokenKind kind = Peek().getKind();
198 case tok::greatergreater:
199 template_counter -= 2;
200 can_open_template = false;
205 can_open_template = false;
209 // '<' is an attempt to open a subteamplte
210 // check if parser is at the point where it's actually possible,
211 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'.
212 // No need to do the same for '>' because compiler actually makes sure
213 // that '>' always surrounded by brackets to avoid ambiguity.
214 if (can_open_template)
216 can_open_template = false;
219 case tok::kw_operator: // C++ operator overloading.
220 if (!ConsumeOperator())
222 can_open_template = true;
224 case tok::raw_identifier:
225 can_open_template = true;
229 if (!ConsumeBrackets(tok::l_square, tok::r_square))
231 can_open_template = false;
234 if (!ConsumeArguments())
236 can_open_template = false;
239 can_open_template = false;
245 assert(template_counter >= 0);
246 if (template_counter > 0) {
249 start_position.Remove();
253 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
254 Bookmark start_position = SetBookmark();
255 if (!ConsumeToken(tok::l_paren)) {
258 constexpr llvm::StringLiteral g_anonymous("anonymous");
259 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
260 Peek().getRawIdentifier() == g_anonymous) {
266 if (!ConsumeToken(tok::kw_namespace)) {
270 if (!ConsumeToken(tok::r_paren)) {
273 start_position.Remove();
277 bool CPlusPlusNameParser::ConsumeLambda() {
278 Bookmark start_position = SetBookmark();
279 if (!ConsumeToken(tok::l_brace)) {
282 constexpr llvm::StringLiteral g_lambda("lambda");
283 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
284 Peek().getRawIdentifier() == g_lambda) {
285 // Put the matched brace back so we can use ConsumeBrackets
291 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
295 start_position.Remove();
299 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
300 tok::TokenKind right) {
301 Bookmark start_position = SetBookmark();
302 if (!HasMoreTokens() || Peek().getKind() != left)
307 while (HasMoreTokens() && counter > 0) {
308 tok::TokenKind kind = Peek().getKind();
311 else if (kind == left)
316 assert(counter >= 0);
320 start_position.Remove();
324 bool CPlusPlusNameParser::ConsumeOperator() {
325 Bookmark start_position = SetBookmark();
326 if (!ConsumeToken(tok::kw_operator))
329 if (!HasMoreTokens()) {
333 const auto &token = Peek();
334 switch (token.getKind()) {
337 // This is 'new' or 'delete' operators.
339 // Check for array new/delete.
340 if (HasMoreTokens() && Peek().is(tok::l_square)) {
341 // Consume the '[' and ']'.
342 if (!ConsumeBrackets(tok::l_square, tok::r_square))
347 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \
351 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
352 #include "clang/Basic/OperatorKinds.def"
353 #undef OVERLOADED_OPERATOR
354 #undef OVERLOADED_OPERATOR_MULTI
357 // Call operator consume '(' ... ')'.
358 if (ConsumeBrackets(tok::l_paren, tok::r_paren))
363 // This is a [] operator.
364 // Consume the '[' and ']'.
365 if (ConsumeBrackets(tok::l_square, tok::r_square))
370 // This might be a cast operator.
371 if (ConsumeTypename())
375 start_position.Remove();
379 void CPlusPlusNameParser::SkipTypeQualifiers() {
380 while (ConsumeToken(tok::kw_const, tok::kw_volatile))
384 void CPlusPlusNameParser::SkipFunctionQualifiers() {
385 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
389 bool CPlusPlusNameParser::ConsumeBuiltinType() {
391 bool continue_parsing = true;
392 // Built-in types can be made of a few keywords
393 // like 'unsigned long long int'. This function
394 // consumes all built-in type keywords without
395 // checking if they make sense like 'unsigned char void'.
396 while (continue_parsing && HasMoreTokens()) {
397 switch (Peek().getKind()) {
400 case tok::kw___int64:
401 case tok::kw___int128:
403 case tok::kw_unsigned:
410 case tok::kw___float128:
411 case tok::kw_wchar_t:
413 case tok::kw_char16_t:
414 case tok::kw_char32_t:
419 continue_parsing = false;
426 void CPlusPlusNameParser::SkipPtrsAndRefs() {
428 ConsumePtrsAndRefs();
431 bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
433 SkipTypeQualifiers();
434 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
437 SkipTypeQualifiers();
442 bool CPlusPlusNameParser::ConsumeDecltype() {
443 Bookmark start_position = SetBookmark();
444 if (!ConsumeToken(tok::kw_decltype))
447 if (!ConsumeArguments())
450 start_position.Remove();
454 bool CPlusPlusNameParser::ConsumeTypename() {
455 Bookmark start_position = SetBookmark();
456 SkipTypeQualifiers();
457 if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
458 if (!ParseFullNameImpl())
462 start_position.Remove();
466 Optional<CPlusPlusNameParser::ParsedNameRanges>
467 CPlusPlusNameParser::ParseFullNameImpl() {
468 // Name parsing state machine.
470 Beginning, // start of the name
471 AfterTwoColons, // right after ::
472 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
473 AfterTemplate, // right after template brackets (<something>)
474 AfterOperator, // right after name of C++ operator
477 Bookmark start_position = SetBookmark();
478 State state = State::Beginning;
479 bool continue_parsing = true;
480 Optional<size_t> last_coloncolon_position = None;
482 while (continue_parsing && HasMoreTokens()) {
483 const auto &token = Peek();
484 switch (token.getKind()) {
485 case tok::raw_identifier: // Just a name.
486 if (state != State::Beginning && state != State::AfterTwoColons) {
487 continue_parsing = false;
491 state = State::AfterIdentifier;
494 if (state == State::Beginning || state == State::AfterTwoColons) {
495 // (anonymous namespace)
496 if (ConsumeAnonymousNamespace()) {
497 state = State::AfterIdentifier;
502 // Type declared inside a function 'func()::Type'
503 if (state != State::AfterIdentifier && state != State::AfterTemplate &&
504 state != State::AfterOperator) {
505 continue_parsing = false;
508 Bookmark l_paren_position = SetBookmark();
509 // Consume the '(' ... ') [const]'.
510 if (!ConsumeArguments()) {
511 continue_parsing = false;
514 SkipFunctionQualifiers();
517 size_t coloncolon_position = GetCurrentPosition();
518 if (!ConsumeToken(tok::coloncolon)) {
519 continue_parsing = false;
522 l_paren_position.Remove();
523 last_coloncolon_position = coloncolon_position;
524 state = State::AfterTwoColons;
528 if (state == State::Beginning || state == State::AfterTwoColons) {
529 if (ConsumeLambda()) {
530 state = State::AfterIdentifier;
534 continue_parsing = false;
536 case tok::coloncolon: // Type nesting delimiter.
537 if (state != State::Beginning && state != State::AfterIdentifier &&
538 state != State::AfterTemplate) {
539 continue_parsing = false;
542 last_coloncolon_position = GetCurrentPosition();
544 state = State::AfterTwoColons;
546 case tok::less: // Template brackets.
547 if (state != State::AfterIdentifier && state != State::AfterOperator) {
548 continue_parsing = false;
551 if (!ConsumeTemplateArgs()) {
552 continue_parsing = false;
555 state = State::AfterTemplate;
557 case tok::kw_operator: // C++ operator overloading.
558 if (state != State::Beginning && state != State::AfterTwoColons) {
559 continue_parsing = false;
562 if (!ConsumeOperator()) {
563 continue_parsing = false;
566 state = State::AfterOperator;
568 case tok::tilde: // Destructor.
569 if (state != State::Beginning && state != State::AfterTwoColons) {
570 continue_parsing = false;
574 if (ConsumeToken(tok::raw_identifier)) {
575 state = State::AfterIdentifier;
578 continue_parsing = false;
582 continue_parsing = false;
587 if (state == State::AfterIdentifier || state == State::AfterOperator ||
588 state == State::AfterTemplate) {
589 ParsedNameRanges result;
590 if (last_coloncolon_position) {
591 result.context_range = Range(start_position.GetSavedPosition(),
592 last_coloncolon_position.getValue());
593 result.basename_range =
594 Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
596 result.basename_range =
597 Range(start_position.GetSavedPosition(), GetCurrentPosition());
599 start_position.Remove();
606 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
608 return llvm::StringRef();
609 assert(range.begin_index < range.end_index);
610 assert(range.begin_index < m_tokens.size());
611 assert(range.end_index <= m_tokens.size());
612 clang::Token &first_token = m_tokens[range.begin_index];
613 clang::Token &last_token = m_tokens[range.end_index - 1];
614 clang::SourceLocation start_loc = first_token.getLocation();
615 clang::SourceLocation end_loc = last_token.getLocation();
616 unsigned start_pos = start_loc.getRawEncoding();
617 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
618 return m_text.take_front(end_pos).drop_front(start_pos);
621 static const clang::LangOptions &GetLangOptions() {
622 static clang::LangOptions g_options;
623 static llvm::once_flag g_once_flag;
624 llvm::call_once(g_once_flag, []() {
625 g_options.LineComment = true;
626 g_options.C99 = true;
627 g_options.C11 = true;
628 g_options.CPlusPlus = true;
629 g_options.CPlusPlus11 = true;
630 g_options.CPlusPlus14 = true;
631 g_options.CPlusPlus17 = true;
636 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
637 static llvm::StringMap<tok::TokenKind> g_map{
638 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
639 #include "clang/Basic/TokenKinds.def"
645 void CPlusPlusNameParser::ExtractTokens() {
646 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
647 m_text.data(), m_text.data() + m_text.size());
648 const auto &kw_map = GetKeywordsMap();
650 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
651 lexer.LexFromRawLexer(token)) {
652 if (token.is(clang::tok::raw_identifier)) {
653 auto it = kw_map.find(token.getRawIdentifier());
654 if (it != kw_map.end()) {
655 token.setKind(it->getValue());
659 m_tokens.push_back(token);