1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/BuildTree.h"
9 #include "clang/AST/RecursiveASTVisitor.h"
10 #include "clang/AST/Stmt.h"
11 #include "clang/Basic/LLVM.h"
12 #include "clang/Basic/SourceLocation.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "clang/Basic/TokenKinds.h"
15 #include "clang/Lex/Lexer.h"
16 #include "clang/Tooling/Syntax/Nodes.h"
17 #include "clang/Tooling/Syntax/Tokens.h"
18 #include "clang/Tooling/Syntax/Tree.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/Support/Allocator.h"
23 #include "llvm/Support/Casting.h"
24 #include "llvm/Support/FormatVariadic.h"
25 #include "llvm/Support/raw_ostream.h"
28 using namespace clang;
30 /// A helper class for constructing the syntax tree while traversing a clang
33 /// At each point of the traversal we maintain a list of pending nodes.
34 /// Initially all tokens are added as pending nodes. When processing a clang AST
35 /// node, the clients need to:
36 /// - create a corresponding syntax node,
37 /// - assign roles to all pending child nodes with 'markChild' and
39 /// - replace the child nodes with the new syntax node in the pending list
42 /// Note that all children are expected to be processed when building a node.
44 /// Call finalize() to finish building the tree and consume the root node.
45 class syntax::TreeBuilder {
47 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {}
49 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); }
51 /// Populate children for \p New node, assuming it covers tokens from \p
53 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
55 /// Set role for a token starting at \p Loc.
56 void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R);
58 /// Finish building the tree and consume the root node.
59 syntax::TranslationUnit *finalize() && {
60 auto Tokens = Arena.tokenBuffer().expandedTokens();
61 // Build the root of the tree, consuming all the children.
62 Pending.foldChildren(Tokens,
63 new (Arena.allocator()) syntax::TranslationUnit);
65 return cast<syntax::TranslationUnit>(std::move(Pending).finalize());
68 /// getRange() finds the syntax tokens corresponding to the passed source
70 /// \p First is the start position of the first token and \p Last is the start
71 /// position of the last token.
72 llvm::ArrayRef<syntax::Token> getRange(SourceLocation First,
73 SourceLocation Last) const {
74 assert(First.isValid());
75 assert(Last.isValid());
76 assert(First == Last ||
77 Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
78 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
80 llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
81 return getRange(D->getBeginLoc(), D->getEndLoc());
83 llvm::ArrayRef<syntax::Token> getRange(const Stmt *S) const {
84 return getRange(S->getBeginLoc(), S->getEndLoc());
88 /// Finds a token starting at \p L. The token must exist.
89 const syntax::Token *findToken(SourceLocation L) const;
91 /// A collection of trees covering the input tokens.
92 /// When created, each tree corresponds to a single token in the file.
93 /// Clients call 'foldChildren' to attach one or more subtrees to a parent
94 /// node and update the list of trees accordingly.
96 /// Ensures that added nodes properly nest and cover the whole token stream.
98 Forest(syntax::Arena &A) {
99 // FIXME: do not add 'eof' to the tree.
101 // Create all leaf nodes.
102 for (auto &T : A.tokenBuffer().expandedTokens())
103 Trees.insert(Trees.end(),
104 {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}});
107 void assignRole(llvm::ArrayRef<syntax::Token> Range,
108 syntax::NodeRole Role) {
109 assert(!Range.empty());
110 auto It = Trees.lower_bound(Range.begin());
111 assert(It != Trees.end() && "no node found");
112 assert(It->first == Range.begin() && "no child with the specified range");
113 assert((std::next(It) == Trees.end() ||
114 std::next(It)->first == Range.end()) &&
115 "no child with the specified range");
116 It->second.Role = Role;
119 /// Add \p Node to the forest and fill its children nodes based on the \p
121 void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens,
122 syntax::Tree *Node) {
123 assert(!NodeTokens.empty());
124 assert(Node->firstChild() == nullptr && "node already has children");
126 auto *FirstToken = NodeTokens.begin();
127 auto BeginChildren = Trees.lower_bound(FirstToken);
128 assert(BeginChildren != Trees.end() &&
129 BeginChildren->first == FirstToken &&
130 "fold crosses boundaries of existing subtrees");
131 auto EndChildren = Trees.lower_bound(NodeTokens.end());
132 assert((EndChildren == Trees.end() ||
133 EndChildren->first == NodeTokens.end()) &&
134 "fold crosses boundaries of existing subtrees");
136 // (!) we need to go in reverse order, because we can only prepend.
137 for (auto It = EndChildren; It != BeginChildren; --It)
138 Node->prependChildLowLevel(std::prev(It)->second.Node,
139 std::prev(It)->second.Role);
141 Trees.erase(BeginChildren, EndChildren);
142 Trees.insert({FirstToken, NodeAndRole(Node)});
145 // EXPECTS: all tokens were consumed and are owned by a single root node.
146 syntax::Node *finalize() && {
147 assert(Trees.size() == 1);
148 auto *Root = Trees.begin()->second.Node;
153 std::string str(const syntax::Arena &A) const {
155 for (auto It = Trees.begin(); It != Trees.end(); ++It) {
156 unsigned CoveredTokens =
158 ? (std::next(It)->first - It->first)
159 : A.tokenBuffer().expandedTokens().end() - It->first;
161 R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n",
162 It->second.Node->kind(),
163 It->first->text(A.sourceManager()), CoveredTokens);
164 R += It->second.Node->dump(A);
170 /// A with a role that should be assigned to it when adding to a parent.
172 explicit NodeAndRole(syntax::Node *Node)
173 : Node(Node), Role(NodeRole::Unknown) {}
179 /// Maps from the start token to a subtree starting at that token.
180 /// FIXME: storing the end tokens is redundant.
181 /// FIXME: the key of a map is redundant, it is also stored in NodeForRange.
182 std::map<const syntax::Token *, NodeAndRole> Trees;
185 /// For debugging purposes.
186 std::string str() { return Pending.str(Arena); }
188 syntax::Arena &Arena;
193 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
195 explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder)
196 : Builder(Builder), LangOpts(Ctx.getLangOpts()) {}
198 bool shouldTraversePostOrder() const { return true; }
200 bool TraverseDecl(Decl *D) {
201 if (!D || isa<TranslationUnitDecl>(D))
202 return RecursiveASTVisitor::TraverseDecl(D);
203 if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext()))
204 return true; // Only build top-level decls for now, do not recurse.
205 return RecursiveASTVisitor::TraverseDecl(D);
208 bool VisitDecl(Decl *D) {
209 assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) &&
210 "expected a top-level decl");
211 assert(!D->isImplicit());
212 Builder.foldNode(Builder.getRange(D),
213 new (allocator()) syntax::TopLevelDeclaration());
217 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
218 // (!) we do not want to call VisitDecl(), the declaration for translation
219 // unit is built by finalize().
223 bool WalkUpFromCompoundStmt(CompoundStmt *S) {
224 using NodeRole = syntax::NodeRole;
226 Builder.markChildToken(S->getLBracLoc(), tok::l_brace,
227 NodeRole::CompoundStatement_lbrace);
228 Builder.markChildToken(S->getRBracLoc(), tok::r_brace,
229 NodeRole::CompoundStatement_rbrace);
231 Builder.foldNode(Builder.getRange(S),
232 new (allocator()) syntax::CompoundStatement);
237 /// A small helper to save some typing.
238 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
240 syntax::TreeBuilder &Builder;
241 const LangOptions &LangOpts;
245 void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range,
247 Pending.foldChildren(Range, New);
250 void syntax::TreeBuilder::markChildToken(SourceLocation Loc,
251 tok::TokenKind Kind, NodeRole Role) {
254 Pending.assignRole(*findToken(Loc), Role);
257 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
258 auto Tokens = Arena.tokenBuffer().expandedTokens();
259 auto &SM = Arena.sourceManager();
260 auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) {
261 return SM.isBeforeInTranslationUnit(T.location(), L);
263 assert(It != Tokens.end());
264 assert(It->location() == L);
268 syntax::TranslationUnit *
269 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) {
270 TreeBuilder Builder(A);
271 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext());
272 return std::move(Builder).finalize();