1 //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // Defines the basic structure of the syntax tree. There are two kinds of nodes:
9 // - leaf nodes correspond to a token in the expanded token stream,
10 // - tree nodes correspond to language grammar constructs.
12 // The tree is initially built from an AST. Each node of a newly built tree
13 // covers a continous subrange of expanded tokens (i.e. tokens after
14 // preprocessing), the specific tokens coverered are stored in the leaf nodes of
15 // a tree. A post-order traversal of a tree will visit leaf nodes in an order
16 // corresponding the original order of expanded tokens.
18 // This is still work in progress and highly experimental, we leave room for
19 // ourselves to completely change the design and/or implementation.
20 //===----------------------------------------------------------------------===//
21 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
22 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
24 #include "clang/Basic/LangOptions.h"
25 #include "clang/Basic/SourceLocation.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/Basic/TokenKinds.h"
28 #include "clang/Tooling/Syntax/Tokens.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/Support/Allocator.h"
37 /// A memory arena for syntax trees. Also tracks the underlying token buffers,
38 /// source manager, etc.
41 Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
44 const SourceManager &sourceManager() const { return SourceMgr; }
45 const LangOptions &langOptions() const { return LangOpts; }
47 const TokenBuffer &tokenBuffer() const;
48 llvm::BumpPtrAllocator &allocator() { return Allocator; }
50 /// Add \p Buffer to the underlying source manager, tokenize it and store the
51 /// resulting tokens. Useful when there is a need to materialize tokens that
52 /// were not written in user code.
53 std::pair<FileID, llvm::ArrayRef<syntax::Token>>
54 lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
57 SourceManager &SourceMgr;
58 const LangOptions &LangOpts;
60 /// IDs and storage for additional tokenized files.
61 llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens;
62 /// Keeps all the allocated nodes and their intermediate data structures.
63 llvm::BumpPtrAllocator Allocator;
68 enum class NodeKind : uint16_t;
69 enum class NodeRole : uint8_t;
71 /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
72 /// a Tree (representing language constructrs).
75 /// Newly created nodes are detached from a tree, parent and sibling links are
76 /// set when the node is added as a child to another one.
79 NodeKind kind() const { return static_cast<NodeKind>(Kind); }
80 NodeRole role() const { return static_cast<NodeRole>(Role); }
82 const Tree *parent() const { return Parent; }
83 Tree *parent() { return Parent; }
85 const Node *nextSibling() const { return NextSibling; }
86 Node *nextSibling() { return NextSibling; }
88 /// Dumps the structure of a subtree. For debugging and testing purposes.
89 std::string dump(const Arena &A) const;
90 /// Dumps the tokens forming this subtree.
91 std::string dumpTokens(const Arena &A) const;
94 // Tree is allowed to change the Parent link and Role.
103 /// A leaf node points to a single token inside the expanded token stream.
104 class Leaf final : public Node {
106 Leaf(const syntax::Token *T);
107 static bool classof(const Node *N);
109 const syntax::Token *token() const { return Tok; }
112 const syntax::Token *Tok;
115 /// A node that has children and represents a syntactic language construct.
116 class Tree : public Node {
119 static bool classof(const Node *N);
121 Node *firstChild() { return FirstChild; }
122 const Node *firstChild() const { return FirstChild; }
125 /// Find the first node with a corresponding role.
126 syntax::Node *findChild(NodeRole R);
129 /// Prepend \p Child to the list of children and and sets the parent pointer.
130 /// A very low-level operation that does not check any invariants, only used
132 /// EXPECTS: Role != NodeRoleDetached.
133 void prependChildLowLevel(Node *Child, NodeRole Role);
134 friend class TreeBuilder;
136 Node *FirstChild = nullptr;
139 } // namespace syntax