1 //===-- clang-format/ClangFormat.cpp - Clang format tool ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements a clang-format tool that automatically formats
11 /// (fragments of) C++ code.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Basic/Diagnostic.h"
16 #include "clang/Basic/DiagnosticOptions.h"
17 #include "clang/Basic/FileManager.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Basic/Version.h"
20 #include "clang/Format/Format.h"
21 #include "clang/Rewrite/Core/Rewriter.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/FileSystem.h"
24 #include "llvm/Support/InitLLVM.h"
25 #include "llvm/Support/Process.h"
28 using clang::tooling::Replacements;
30 static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
32 // Mark all our options with this category, everything else (except for -version
33 // and -help) will be hidden.
34 static cl::OptionCategory ClangFormatCategory("Clang-format options");
36 static cl::list<unsigned>
38 cl::desc("Format a range starting at this byte offset.\n"
39 "Multiple ranges can be formatted by specifying\n"
40 "several -offset and -length pairs.\n"
41 "Can only be used with one input file."),
42 cl::cat(ClangFormatCategory));
43 static cl::list<unsigned>
45 cl::desc("Format a range of this length (in bytes).\n"
46 "Multiple ranges can be formatted by specifying\n"
47 "several -offset and -length pairs.\n"
48 "When only a single -offset is specified without\n"
49 "-length, clang-format will format up to the end\n"
51 "Can only be used with one input file."),
52 cl::cat(ClangFormatCategory));
53 static cl::list<std::string>
54 LineRanges("lines", cl::desc("<start line>:<end line> - format a range of\n"
55 "lines (both 1-based).\n"
56 "Multiple ranges can be formatted by specifying\n"
57 "several -lines arguments.\n"
58 "Can't be used with -offset and -length.\n"
59 "Can only be used with one input file."),
60 cl::cat(ClangFormatCategory));
61 static cl::opt<std::string>
62 Style("style", cl::desc(clang::format::StyleOptionHelpDescription),
63 cl::init(clang::format::DefaultFormatStyle),
64 cl::cat(ClangFormatCategory));
65 static cl::opt<std::string>
66 FallbackStyle("fallback-style",
67 cl::desc("The name of the predefined style used as a\n"
68 "fallback in case clang-format is invoked with\n"
69 "-style=file, but can not find the .clang-format\n"
71 "Use -fallback-style=none to skip formatting."),
72 cl::init(clang::format::DefaultFallbackStyle),
73 cl::cat(ClangFormatCategory));
75 static cl::opt<std::string>
76 AssumeFileName("assume-filename",
77 cl::desc("When reading from stdin, clang-format assumes this\n"
78 "filename to look for a style config file (with\n"
79 "-style=file) and to determine the language."),
80 cl::init("<stdin>"), cl::cat(ClangFormatCategory));
82 static cl::opt<bool> Inplace("i",
83 cl::desc("Inplace edit <file>s, if specified."),
84 cl::cat(ClangFormatCategory));
86 static cl::opt<bool> OutputXML("output-replacements-xml",
87 cl::desc("Output replacements as XML."),
88 cl::cat(ClangFormatCategory));
90 DumpConfig("dump-config",
91 cl::desc("Dump configuration options to stdout and exit.\n"
92 "Can be used with -style option."),
93 cl::cat(ClangFormatCategory));
94 static cl::opt<unsigned>
96 cl::desc("The position of the cursor when invoking\n"
97 "clang-format from an editor integration"),
98 cl::init(0), cl::cat(ClangFormatCategory));
100 static cl::opt<bool> SortIncludes(
102 cl::desc("If set, overrides the include sorting behavior determined by the "
103 "SortIncludes style flag"),
104 cl::cat(ClangFormatCategory));
107 Verbose("verbose", cl::desc("If set, shows the list of processed files"),
108 cl::cat(ClangFormatCategory));
110 static cl::list<std::string> FileNames(cl::Positional, cl::desc("[<file> ...]"),
111 cl::cat(ClangFormatCategory));
116 static FileID createInMemoryFile(StringRef FileName, MemoryBuffer *Source,
117 SourceManager &Sources, FileManager &Files,
118 llvm::vfs::InMemoryFileSystem *MemFS) {
119 MemFS->addFileNoOwn(FileName, 0, Source);
120 return Sources.createFileID(Files.getFile(FileName), SourceLocation(),
124 // Parses <start line>:<end line> input to a pair of line numbers.
125 // Returns true on error.
126 static bool parseLineRange(StringRef Input, unsigned &FromLine,
128 std::pair<StringRef, StringRef> LineRange = Input.split(':');
129 return LineRange.first.getAsInteger(0, FromLine) ||
130 LineRange.second.getAsInteger(0, ToLine);
133 static bool fillRanges(MemoryBuffer *Code,
134 std::vector<tooling::Range> &Ranges) {
135 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
136 new llvm::vfs::InMemoryFileSystem);
137 FileManager Files(FileSystemOptions(), InMemoryFileSystem);
138 DiagnosticsEngine Diagnostics(
139 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
140 new DiagnosticOptions);
141 SourceManager Sources(Diagnostics, Files);
142 FileID ID = createInMemoryFile("<irrelevant>", Code, Sources, Files,
143 InMemoryFileSystem.get());
144 if (!LineRanges.empty()) {
145 if (!Offsets.empty() || !Lengths.empty()) {
146 errs() << "error: cannot use -lines with -offset/-length\n";
150 for (unsigned i = 0, e = LineRanges.size(); i < e; ++i) {
151 unsigned FromLine, ToLine;
152 if (parseLineRange(LineRanges[i], FromLine, ToLine)) {
153 errs() << "error: invalid <start line>:<end line> pair\n";
156 if (FromLine > ToLine) {
157 errs() << "error: start line should be less than end line\n";
160 SourceLocation Start = Sources.translateLineCol(ID, FromLine, 1);
161 SourceLocation End = Sources.translateLineCol(ID, ToLine, UINT_MAX);
162 if (Start.isInvalid() || End.isInvalid())
164 unsigned Offset = Sources.getFileOffset(Start);
165 unsigned Length = Sources.getFileOffset(End) - Offset;
166 Ranges.push_back(tooling::Range(Offset, Length));
172 Offsets.push_back(0);
173 if (Offsets.size() != Lengths.size() &&
174 !(Offsets.size() == 1 && Lengths.empty())) {
175 errs() << "error: number of -offset and -length arguments must match.\n";
178 for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
179 if (Offsets[i] >= Code->getBufferSize()) {
180 errs() << "error: offset " << Offsets[i] << " is outside the file\n";
183 SourceLocation Start =
184 Sources.getLocForStartOfFile(ID).getLocWithOffset(Offsets[i]);
186 if (i < Lengths.size()) {
187 if (Offsets[i] + Lengths[i] > Code->getBufferSize()) {
188 errs() << "error: invalid length " << Lengths[i]
189 << ", offset + length (" << Offsets[i] + Lengths[i]
190 << ") is outside the file.\n";
193 End = Start.getLocWithOffset(Lengths[i]);
195 End = Sources.getLocForEndOfFile(ID);
197 unsigned Offset = Sources.getFileOffset(Start);
198 unsigned Length = Sources.getFileOffset(End) - Offset;
199 Ranges.push_back(tooling::Range(Offset, Length));
204 static void outputReplacementXML(StringRef Text) {
205 // FIXME: When we sort includes, we need to make sure the stream is correct
209 while ((Index = Text.find_first_of("\n\r<&", From)) != StringRef::npos) {
210 outs() << Text.substr(From, Index - From);
211 switch (Text[Index]) {
225 llvm_unreachable("Unexpected character encountered!");
229 outs() << Text.substr(From);
232 static void outputReplacementsXML(const Replacements &Replaces) {
233 for (const auto &R : Replaces) {
234 outs() << "<replacement "
235 << "offset='" << R.getOffset() << "' "
236 << "length='" << R.getLength() << "'>";
237 outputReplacementXML(R.getReplacementText());
238 outs() << "</replacement>\n";
242 // Returns true on error.
243 static bool format(StringRef FileName) {
244 if (!OutputXML && Inplace && FileName == "-") {
245 errs() << "error: cannot use -i when reading from stdin.\n";
248 // On Windows, overwriting a file with an open file mapping doesn't work,
249 // so read the whole file into memory when formatting in-place.
250 ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
251 !OutputXML && Inplace ? MemoryBuffer::getFileAsStream(FileName) :
252 MemoryBuffer::getFileOrSTDIN(FileName);
253 if (std::error_code EC = CodeOrErr.getError()) {
254 errs() << EC.message() << "\n";
257 std::unique_ptr<llvm::MemoryBuffer> Code = std::move(CodeOrErr.get());
258 if (Code->getBufferSize() == 0)
259 return false; // Empty files are formatted correctly.
261 // Check to see if the buffer has a UTF Byte Order Mark (BOM).
262 // We only support UTF-8 with and without a BOM right now. See
263 // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding
264 // for more information.
265 StringRef BufStr = Code->getBuffer();
266 const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr)
267 .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
269 .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
271 .StartsWith("\xFE\xFF", "UTF-16 (BE)")
272 .StartsWith("\xFF\xFE", "UTF-16 (LE)")
273 .StartsWith("\x2B\x2F\x76", "UTF-7")
274 .StartsWith("\xF7\x64\x4C", "UTF-1")
275 .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
276 .StartsWith("\x0E\xFE\xFF", "SCSU")
277 .StartsWith("\xFB\xEE\x28", "BOCU-1")
278 .StartsWith("\x84\x31\x95\x33", "GB-18030")
282 errs() << "error: encoding with unsupported byte order mark \""
283 << InvalidBOM << "\" detected";
285 errs() << " in file '" << FileName << "'";
290 std::vector<tooling::Range> Ranges;
291 if (fillRanges(Code.get(), Ranges))
293 StringRef AssumedFileName = (FileName == "-") ? AssumeFileName : FileName;
295 llvm::Expected<FormatStyle> FormatStyle =
296 getStyle(Style, AssumedFileName, FallbackStyle, Code->getBuffer());
298 llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n";
302 if (SortIncludes.getNumOccurrences() != 0)
303 FormatStyle->SortIncludes = SortIncludes;
304 unsigned CursorPosition = Cursor;
305 Replacements Replaces = sortIncludes(*FormatStyle, Code->getBuffer(), Ranges,
306 AssumedFileName, &CursorPosition);
307 auto ChangedCode = tooling::applyAllReplacements(Code->getBuffer(), Replaces);
309 llvm::errs() << llvm::toString(ChangedCode.takeError()) << "\n";
312 // Get new affected ranges after sorting `#includes`.
313 Ranges = tooling::calculateRangesAfterReplacements(Replaces, Ranges);
314 FormattingAttemptStatus Status;
315 Replacements FormatChanges = reformat(*FormatStyle, *ChangedCode, Ranges,
316 AssumedFileName, &Status);
317 Replaces = Replaces.merge(FormatChanges);
319 outs() << "<?xml version='1.0'?>\n<replacements "
320 "xml:space='preserve' incomplete_format='"
321 << (Status.FormatComplete ? "false" : "true") << "'";
322 if (!Status.FormatComplete)
323 outs() << " line='" << Status.Line << "'";
325 if (Cursor.getNumOccurrences() != 0)
327 << FormatChanges.getShiftedCodePosition(CursorPosition)
330 outputReplacementsXML(Replaces);
331 outs() << "</replacements>\n";
333 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
334 new llvm::vfs::InMemoryFileSystem);
335 FileManager Files(FileSystemOptions(), InMemoryFileSystem);
336 DiagnosticsEngine Diagnostics(
337 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
338 new DiagnosticOptions);
339 SourceManager Sources(Diagnostics, Files);
340 FileID ID = createInMemoryFile(AssumedFileName, Code.get(), Sources, Files,
341 InMemoryFileSystem.get());
342 Rewriter Rewrite(Sources, LangOptions());
343 tooling::applyAllReplacements(Replaces, Rewrite);
345 if (Rewrite.overwriteChangedFiles())
348 if (Cursor.getNumOccurrences() != 0) {
349 outs() << "{ \"Cursor\": "
350 << FormatChanges.getShiftedCodePosition(CursorPosition)
351 << ", \"IncompleteFormat\": "
352 << (Status.FormatComplete ? "false" : "true");
353 if (!Status.FormatComplete)
354 outs() << ", \"Line\": " << Status.Line;
357 Rewrite.getEditBuffer(ID).write(outs());
363 } // namespace format
366 static void PrintVersion(raw_ostream &OS) {
367 OS << clang::getClangToolFullVersion("clang-format") << '\n';
370 int main(int argc, const char **argv) {
371 llvm::InitLLVM X(argc, argv);
373 cl::HideUnrelatedOptions(ClangFormatCategory);
375 cl::SetVersionPrinter(PrintVersion);
376 cl::ParseCommandLineOptions(
378 "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.\n\n"
379 "If no arguments are specified, it formats the code from standard input\n"
380 "and writes the result to the standard output.\n"
381 "If <file>s are given, it reformats the files. If -i is specified\n"
382 "together with <file>s, the files are edited in-place. Otherwise, the\n"
383 "result is written to the standard output.\n");
386 cl::PrintHelpMessage();
392 std::unique_ptr<llvm::MemoryBuffer> Code;
393 if (FileNames.empty()) {
394 // We can't read the code to detect the language if there's no
395 // file name, so leave Code empty here.
396 FileName = AssumeFileName;
398 // Read in the code in case the filename alone isn't enough to
399 // detect the language.
400 ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
401 MemoryBuffer::getFileOrSTDIN(FileNames[0]);
402 if (std::error_code EC = CodeOrErr.getError()) {
403 llvm::errs() << EC.message() << "\n";
406 FileName = (FileNames[0] == "-") ? AssumeFileName : FileNames[0];
407 Code = std::move(CodeOrErr.get());
409 llvm::Expected<clang::format::FormatStyle> FormatStyle =
410 clang::format::getStyle(Style, FileName, FallbackStyle,
411 Code ? Code->getBuffer() : "");
413 llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n";
416 std::string Config = clang::format::configurationAsText(*FormatStyle);
417 outs() << Config << "\n";
422 if (FileNames.empty()) {
423 Error = clang::format::format("-");
424 return Error ? 1 : 0;
426 if (FileNames.size() != 1 && (!Offsets.empty() || !Lengths.empty() || !LineRanges.empty())) {
427 errs() << "error: -offset, -length and -lines can only be used for "
431 for (const auto &FileName : FileNames) {
433 errs() << "Formatting " << FileName << "\n";
434 Error |= clang::format::format(FileName);
436 return Error ? 1 : 0;