1 //===-- ClangHighlighter.cpp ------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "ClangHighlighter.h"
12 #include "lldb/Target/Language.h"
13 #include "lldb/Utility/AnsiTerminal.h"
14 #include "lldb/Utility/StreamString.h"
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Lex/Lexer.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/MemoryBuffer.h"
21 using namespace lldb_private;
23 bool ClangHighlighter::isKeyword(llvm::StringRef token) const {
24 return keywords.find(token) != keywords.end();
27 ClangHighlighter::ClangHighlighter() {
28 #define KEYWORD(X, N) keywords.insert(#X);
29 #include "clang/Basic/TokenKinds.def"
32 /// Determines which style should be applied to the given token.
33 /// \param highlighter
34 /// The current highlighter that should use the style.
36 /// The current token.
38 /// The string in the source code the token represents.
40 /// The style we use for coloring the source code.
41 /// \param in_pp_directive
42 /// If we are currently in a preprocessor directive. NOTE: This is
43 /// passed by reference and will be updated if the current token starts
44 /// or ends a preprocessor directive.
46 /// The ColorStyle that should be applied to the token.
47 static HighlightStyle::ColorStyle
48 determineClangStyle(const ClangHighlighter &highlighter,
49 const clang::Token &token, llvm::StringRef tok_str,
50 const HighlightStyle &options, bool &in_pp_directive) {
51 using namespace clang;
53 if (token.is(tok::comment)) {
54 // If we were in a preprocessor directive before, we now left it.
55 in_pp_directive = false;
56 return options.comment;
57 } else if (in_pp_directive || token.getKind() == tok::hash) {
58 // Let's assume that the rest of the line is a PP directive.
59 in_pp_directive = true;
60 // Preprocessor directives are hard to match, so we have to hack this in.
61 return options.pp_directive;
62 } else if (tok::isStringLiteral(token.getKind()))
63 return options.string_literal;
64 else if (tok::isLiteral(token.getKind()))
65 return options.scalar_literal;
66 else if (highlighter.isKeyword(tok_str))
67 return options.keyword;
69 switch (token.getKind()) {
70 case tok::raw_identifier:
72 return options.identifier;
75 return options.braces;
78 return options.square_brackets;
81 return options.parentheses;
102 case tok::exclaimequal:
104 case tok::slashequal:
106 case tok::percentequal:
110 case tok::lesslessequal:
113 case tok::greatergreater:
114 case tok::greaterequal:
115 case tok::greatergreaterequal:
117 case tok::caretequal:
123 case tok::equalequal:
124 return options.operators;
128 return HighlightStyle::ColorStyle();
131 void ClangHighlighter::Highlight(const HighlightStyle &options,
132 llvm::StringRef line,
133 llvm::Optional<size_t> cursor_pos,
134 llvm::StringRef previous_lines,
135 Stream &result) const {
136 using namespace clang;
138 FileSystemOptions file_opts;
139 FileManager file_mgr(file_opts);
141 unsigned line_number = previous_lines.count('\n') + 1U;
143 // Let's build the actual source code Clang needs and setup some utility
145 std::string full_source = previous_lines.str() + line.str();
146 llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
147 llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(
148 new DiagnosticOptions());
149 DiagnosticsEngine diags(diag_ids, diags_opts);
150 clang::SourceManager SM(diags, file_mgr);
151 auto buf = llvm::MemoryBuffer::getMemBuffer(full_source);
153 FileID FID = SM.createFileID(clang::SourceManager::Unowned, buf.get());
155 // Let's just enable the latest ObjC and C++ which should get most tokens
159 // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
160 Opts.CPlusPlus17 = true;
161 Opts.LineComment = true;
163 Lexer lex(FID, buf.get(), SM, Opts);
164 // The lexer should keep whitespace around.
165 lex.SetKeepWhitespaceMode(true);
167 // Keeps track if we have entered a PP directive.
168 bool in_pp_directive = false;
170 // True once we actually lexed the user provided line.
171 bool found_user_line = false;
173 // True if we already highlighted the token under the cursor, false otherwise.
174 bool highlighted_cursor = false;
178 // Returns true if this is the last token we get from the lexer.
179 exit = lex.LexFromRawLexer(token);
181 bool invalid = false;
182 unsigned current_line_number =
183 SM.getSpellingLineNumber(token.getLocation(), &invalid);
184 if (current_line_number != line_number)
186 found_user_line = true;
188 // We don't need to print any tokens without a spelling line number.
192 // Same as above but with the column number.
194 unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
197 // Column numbers start at 1, but indexes in our string start at 0.
200 // Annotations don't have a length, so let's skip them.
201 if (token.isAnnotation())
204 // Extract the token string from our source code.
205 llvm::StringRef tok_str = line.substr(start, token.getLength());
207 // If the token is just an empty string, we can skip all the work below.
211 // If the cursor is inside this token, we have to apply the 'selected'
212 // highlight style before applying the actual token color.
213 llvm::StringRef to_print = tok_str;
214 StreamString storage;
215 auto end = start + token.getLength();
216 if (cursor_pos && end > *cursor_pos && !highlighted_cursor) {
217 highlighted_cursor = true;
218 options.selected.Apply(storage, tok_str);
219 to_print = storage.GetString();
222 // See how we are supposed to highlight this token.
223 HighlightStyle::ColorStyle color =
224 determineClangStyle(*this, token, tok_str, options, in_pp_directive);
226 color.Apply(result, to_print);
229 // If we went over the whole file but couldn't find our own file, then
230 // somehow our setup was wrong. When we're in release mode we just give the
231 // user the normal line and pretend we don't know how to highlight it. In
232 // debug mode we bail out with an assert as this should never happen.
233 if (!found_user_line) {
235 assert(false && "We couldn't find the user line in the input file?");