1 //===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "CPlusPlusNameParser.h"
11 #include "clang/Basic/IdentifierTable.h"
12 #include "llvm/ADT/StringMap.h"
13 #include "llvm/Support/Threading.h"
16 using namespace lldb_private;
19 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
20 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
21 namespace tok = clang::tok;
23 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
24 m_next_token_index = 0;
25 Optional<ParsedFunction> result(None);
27 // Try to parse the name as function without a return type specified e.g.
30 Bookmark start_position = SetBookmark();
31 result = ParseFunctionImpl(false);
32 if (result && !HasMoreTokens())
36 // Try to parse the name as function with function pointer return type e.g.
37 // void (*get_func(const char*))()
38 result = ParseFuncPtr(true);
42 // Finally try to parse the name as a function with non-function return type
43 // e.g. int main(int, char*[])
44 result = ParseFunctionImpl(true);
50 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
51 m_next_token_index = 0;
52 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
58 result.basename = GetTextForRange(name_ranges.getValue().basename_range);
59 result.context = GetTextForRange(name_ranges.getValue().context_range);
63 bool CPlusPlusNameParser::HasMoreTokens() {
64 return m_next_token_index < m_tokens.size();
67 void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
69 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
71 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
82 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
86 if (!Peek().isOneOf(kinds...))
93 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
94 return Bookmark(m_next_token_index);
97 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
99 clang::Token &CPlusPlusNameParser::Peek() {
100 assert(HasMoreTokens());
101 return m_tokens[m_next_token_index];
104 Optional<ParsedFunction>
105 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
106 Bookmark start_position = SetBookmark();
107 if (expect_return_type) {
108 // Consume return type if it's expected.
109 if (!ConsumeTypename())
113 auto maybe_name = ParseFullNameImpl();
118 size_t argument_start = GetCurrentPosition();
119 if (!ConsumeArguments()) {
123 size_t qualifiers_start = GetCurrentPosition();
124 SkipFunctionQualifiers();
125 size_t end_position = GetCurrentPosition();
127 ParsedFunction result;
128 result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
129 result.name.context = GetTextForRange(maybe_name.getValue().context_range);
130 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
131 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
132 start_position.Remove();
136 Optional<ParsedFunction>
137 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
138 Bookmark start_position = SetBookmark();
139 if (expect_return_type) {
140 // Consume return type.
141 if (!ConsumeTypename())
145 if (!ConsumeToken(tok::l_paren))
147 if (!ConsumePtrsAndRefs())
151 Bookmark before_inner_function_pos = SetBookmark();
152 auto maybe_inner_function_name = ParseFunctionImpl(false);
153 if (maybe_inner_function_name)
154 if (ConsumeToken(tok::r_paren))
155 if (ConsumeArguments()) {
156 SkipFunctionQualifiers();
157 start_position.Remove();
158 before_inner_function_pos.Remove();
159 return maybe_inner_function_name;
163 auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
164 if (maybe_inner_function_ptr_name)
165 if (ConsumeToken(tok::r_paren))
166 if (ConsumeArguments()) {
167 SkipFunctionQualifiers();
168 start_position.Remove();
169 return maybe_inner_function_ptr_name;
174 bool CPlusPlusNameParser::ConsumeArguments() {
175 return ConsumeBrackets(tok::l_paren, tok::r_paren);
178 bool CPlusPlusNameParser::ConsumeTemplateArgs() {
179 Bookmark start_position = SetBookmark();
180 if (!HasMoreTokens() || Peek().getKind() != tok::less)
184 // Consuming template arguments is a bit trickier than consuming function
185 // arguments, because '<' '>' brackets are not always trivially balanced. In
186 // some rare cases tokens '<' and '>' can appear inside template arguments as
187 // arithmetic or shift operators not as template brackets. Examples:
188 // std::enable_if<(10u)<(64), bool>
189 // f<A<operator<(X,Y)::Subclass>>
190 // Good thing that compiler makes sure that really ambiguous cases of '>'
191 // usage should be enclosed within '()' brackets.
192 int template_counter = 1;
193 bool can_open_template = false;
194 while (HasMoreTokens() && template_counter > 0) {
195 tok::TokenKind kind = Peek().getKind();
197 case tok::greatergreater:
198 template_counter -= 2;
199 can_open_template = false;
204 can_open_template = false;
208 // '<' is an attempt to open a subteamplte
209 // check if parser is at the point where it's actually possible,
210 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
211 // need to do the same for '>' because compiler actually makes sure that
212 // '>' always surrounded by brackets to avoid ambiguity.
213 if (can_open_template)
215 can_open_template = false;
218 case tok::kw_operator: // C++ operator overloading.
219 if (!ConsumeOperator())
221 can_open_template = true;
223 case tok::raw_identifier:
224 can_open_template = true;
228 if (!ConsumeBrackets(tok::l_square, tok::r_square))
230 can_open_template = false;
233 if (!ConsumeArguments())
235 can_open_template = false;
238 can_open_template = false;
244 if (template_counter != 0) {
247 start_position.Remove();
251 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
252 Bookmark start_position = SetBookmark();
253 if (!ConsumeToken(tok::l_paren)) {
256 constexpr llvm::StringLiteral g_anonymous("anonymous");
257 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
258 Peek().getRawIdentifier() == g_anonymous) {
264 if (!ConsumeToken(tok::kw_namespace)) {
268 if (!ConsumeToken(tok::r_paren)) {
271 start_position.Remove();
275 bool CPlusPlusNameParser::ConsumeLambda() {
276 Bookmark start_position = SetBookmark();
277 if (!ConsumeToken(tok::l_brace)) {
280 constexpr llvm::StringLiteral g_lambda("lambda");
281 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
282 Peek().getRawIdentifier() == g_lambda) {
283 // Put the matched brace back so we can use ConsumeBrackets
289 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
293 start_position.Remove();
297 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
298 tok::TokenKind right) {
299 Bookmark start_position = SetBookmark();
300 if (!HasMoreTokens() || Peek().getKind() != left)
305 while (HasMoreTokens() && counter > 0) {
306 tok::TokenKind kind = Peek().getKind();
309 else if (kind == left)
314 assert(counter >= 0);
318 start_position.Remove();
322 bool CPlusPlusNameParser::ConsumeOperator() {
323 Bookmark start_position = SetBookmark();
324 if (!ConsumeToken(tok::kw_operator))
327 if (!HasMoreTokens()) {
331 const auto &token = Peek();
332 switch (token.getKind()) {
335 // This is 'new' or 'delete' operators.
337 // Check for array new/delete.
338 if (HasMoreTokens() && Peek().is(tok::l_square)) {
339 // Consume the '[' and ']'.
340 if (!ConsumeBrackets(tok::l_square, tok::r_square))
345 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \
349 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
350 #include "clang/Basic/OperatorKinds.def"
351 #undef OVERLOADED_OPERATOR
352 #undef OVERLOADED_OPERATOR_MULTI
355 // Call operator consume '(' ... ')'.
356 if (ConsumeBrackets(tok::l_paren, tok::r_paren))
361 // This is a [] operator.
362 // Consume the '[' and ']'.
363 if (ConsumeBrackets(tok::l_square, tok::r_square))
368 // This might be a cast operator.
369 if (ConsumeTypename())
373 start_position.Remove();
377 void CPlusPlusNameParser::SkipTypeQualifiers() {
378 while (ConsumeToken(tok::kw_const, tok::kw_volatile))
382 void CPlusPlusNameParser::SkipFunctionQualifiers() {
383 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
387 bool CPlusPlusNameParser::ConsumeBuiltinType() {
389 bool continue_parsing = true;
390 // Built-in types can be made of a few keywords like 'unsigned long long
391 // int'. This function consumes all built-in type keywords without checking
392 // if they make sense like 'unsigned char void'.
393 while (continue_parsing && HasMoreTokens()) {
394 switch (Peek().getKind()) {
397 case tok::kw___int64:
398 case tok::kw___int128:
400 case tok::kw_unsigned:
407 case tok::kw___float128:
408 case tok::kw_wchar_t:
410 case tok::kw_char16_t:
411 case tok::kw_char32_t:
416 continue_parsing = false;
423 void CPlusPlusNameParser::SkipPtrsAndRefs() {
425 ConsumePtrsAndRefs();
428 bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
430 SkipTypeQualifiers();
431 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
434 SkipTypeQualifiers();
439 bool CPlusPlusNameParser::ConsumeDecltype() {
440 Bookmark start_position = SetBookmark();
441 if (!ConsumeToken(tok::kw_decltype))
444 if (!ConsumeArguments())
447 start_position.Remove();
451 bool CPlusPlusNameParser::ConsumeTypename() {
452 Bookmark start_position = SetBookmark();
453 SkipTypeQualifiers();
454 if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
455 if (!ParseFullNameImpl())
459 start_position.Remove();
463 Optional<CPlusPlusNameParser::ParsedNameRanges>
464 CPlusPlusNameParser::ParseFullNameImpl() {
465 // Name parsing state machine.
467 Beginning, // start of the name
468 AfterTwoColons, // right after ::
469 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
470 AfterTemplate, // right after template brackets (<something>)
471 AfterOperator, // right after name of C++ operator
474 Bookmark start_position = SetBookmark();
475 State state = State::Beginning;
476 bool continue_parsing = true;
477 Optional<size_t> last_coloncolon_position = None;
479 while (continue_parsing && HasMoreTokens()) {
480 const auto &token = Peek();
481 switch (token.getKind()) {
482 case tok::raw_identifier: // Just a name.
483 if (state != State::Beginning && state != State::AfterTwoColons) {
484 continue_parsing = false;
488 state = State::AfterIdentifier;
491 if (state == State::Beginning || state == State::AfterTwoColons) {
492 // (anonymous namespace)
493 if (ConsumeAnonymousNamespace()) {
494 state = State::AfterIdentifier;
499 // Type declared inside a function 'func()::Type'
500 if (state != State::AfterIdentifier && state != State::AfterTemplate &&
501 state != State::AfterOperator) {
502 continue_parsing = false;
505 Bookmark l_paren_position = SetBookmark();
506 // Consume the '(' ... ') [const]'.
507 if (!ConsumeArguments()) {
508 continue_parsing = false;
511 SkipFunctionQualifiers();
514 size_t coloncolon_position = GetCurrentPosition();
515 if (!ConsumeToken(tok::coloncolon)) {
516 continue_parsing = false;
519 l_paren_position.Remove();
520 last_coloncolon_position = coloncolon_position;
521 state = State::AfterTwoColons;
525 if (state == State::Beginning || state == State::AfterTwoColons) {
526 if (ConsumeLambda()) {
527 state = State::AfterIdentifier;
531 continue_parsing = false;
533 case tok::coloncolon: // Type nesting delimiter.
534 if (state != State::Beginning && state != State::AfterIdentifier &&
535 state != State::AfterTemplate) {
536 continue_parsing = false;
539 last_coloncolon_position = GetCurrentPosition();
541 state = State::AfterTwoColons;
543 case tok::less: // Template brackets.
544 if (state != State::AfterIdentifier && state != State::AfterOperator) {
545 continue_parsing = false;
548 if (!ConsumeTemplateArgs()) {
549 continue_parsing = false;
552 state = State::AfterTemplate;
554 case tok::kw_operator: // C++ operator overloading.
555 if (state != State::Beginning && state != State::AfterTwoColons) {
556 continue_parsing = false;
559 if (!ConsumeOperator()) {
560 continue_parsing = false;
563 state = State::AfterOperator;
565 case tok::tilde: // Destructor.
566 if (state != State::Beginning && state != State::AfterTwoColons) {
567 continue_parsing = false;
571 if (ConsumeToken(tok::raw_identifier)) {
572 state = State::AfterIdentifier;
575 continue_parsing = false;
579 continue_parsing = false;
584 if (state == State::AfterIdentifier || state == State::AfterOperator ||
585 state == State::AfterTemplate) {
586 ParsedNameRanges result;
587 if (last_coloncolon_position) {
588 result.context_range = Range(start_position.GetSavedPosition(),
589 last_coloncolon_position.getValue());
590 result.basename_range =
591 Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
593 result.basename_range =
594 Range(start_position.GetSavedPosition(), GetCurrentPosition());
596 start_position.Remove();
603 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
605 return llvm::StringRef();
606 assert(range.begin_index < range.end_index);
607 assert(range.begin_index < m_tokens.size());
608 assert(range.end_index <= m_tokens.size());
609 clang::Token &first_token = m_tokens[range.begin_index];
610 clang::Token &last_token = m_tokens[range.end_index - 1];
611 clang::SourceLocation start_loc = first_token.getLocation();
612 clang::SourceLocation end_loc = last_token.getLocation();
613 unsigned start_pos = start_loc.getRawEncoding();
614 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
615 return m_text.take_front(end_pos).drop_front(start_pos);
618 static const clang::LangOptions &GetLangOptions() {
619 static clang::LangOptions g_options;
620 static llvm::once_flag g_once_flag;
621 llvm::call_once(g_once_flag, []() {
622 g_options.LineComment = true;
623 g_options.C99 = true;
624 g_options.C11 = true;
625 g_options.CPlusPlus = true;
626 g_options.CPlusPlus11 = true;
627 g_options.CPlusPlus14 = true;
628 g_options.CPlusPlus17 = true;
633 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
634 static llvm::StringMap<tok::TokenKind> g_map{
635 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
636 #include "clang/Basic/TokenKinds.def"
642 void CPlusPlusNameParser::ExtractTokens() {
645 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
646 m_text.data(), m_text.data() + m_text.size());
647 const auto &kw_map = GetKeywordsMap();
649 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
650 lexer.LexFromRawLexer(token)) {
651 if (token.is(clang::tok::raw_identifier)) {
652 auto it = kw_map.find(token.getRawIdentifier());
653 if (it != kw_map.end()) {
654 token.setKind(it->getValue());
658 m_tokens.push_back(token);