1 //===-- clang-format/ClangFormat.cpp - Clang format tool ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements a clang-format tool that automatically formats
11 /// (fragments of) C++ code.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Basic/Diagnostic.h"
16 #include "clang/Basic/DiagnosticOptions.h"
17 #include "clang/Basic/FileManager.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Basic/Version.h"
20 #include "clang/Format/Format.h"
21 #include "clang/Frontend/TextDiagnosticPrinter.h"
22 #include "clang/Rewrite/Core/Rewriter.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/InitLLVM.h"
26 #include "llvm/Support/Process.h"
29 using clang::tooling::Replacements;
31 static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
33 // Mark all our options with this category, everything else (except for -version
34 // and -help) will be hidden.
35 static cl::OptionCategory ClangFormatCategory("Clang-format options");
37 static cl::list<unsigned>
39 cl::desc("Format a range starting at this byte offset.\n"
40 "Multiple ranges can be formatted by specifying\n"
41 "several -offset and -length pairs.\n"
42 "Can only be used with one input file."),
43 cl::cat(ClangFormatCategory));
44 static cl::list<unsigned>
46 cl::desc("Format a range of this length (in bytes).\n"
47 "Multiple ranges can be formatted by specifying\n"
48 "several -offset and -length pairs.\n"
49 "When only a single -offset is specified without\n"
50 "-length, clang-format will format up to the end\n"
52 "Can only be used with one input file."),
53 cl::cat(ClangFormatCategory));
54 static cl::list<std::string>
56 cl::desc("<start line>:<end line> - format a range of\n"
57 "lines (both 1-based).\n"
58 "Multiple ranges can be formatted by specifying\n"
59 "several -lines arguments.\n"
60 "Can't be used with -offset and -length.\n"
61 "Can only be used with one input file."),
62 cl::cat(ClangFormatCategory));
63 static cl::opt<std::string>
64 Style("style", cl::desc(clang::format::StyleOptionHelpDescription),
65 cl::init(clang::format::DefaultFormatStyle),
66 cl::cat(ClangFormatCategory));
67 static cl::opt<std::string>
68 FallbackStyle("fallback-style",
69 cl::desc("The name of the predefined style used as a\n"
70 "fallback in case clang-format is invoked with\n"
71 "-style=file, but can not find the .clang-format\n"
73 "Use -fallback-style=none to skip formatting."),
74 cl::init(clang::format::DefaultFallbackStyle),
75 cl::cat(ClangFormatCategory));
77 static cl::opt<std::string> AssumeFileName(
79 cl::desc("When reading from stdin, clang-format assumes this\n"
80 "filename to look for a style config file (with\n"
81 "-style=file) and to determine the language."),
82 cl::init("<stdin>"), cl::cat(ClangFormatCategory));
84 static cl::opt<bool> Inplace("i",
85 cl::desc("Inplace edit <file>s, if specified."),
86 cl::cat(ClangFormatCategory));
88 static cl::opt<bool> OutputXML("output-replacements-xml",
89 cl::desc("Output replacements as XML."),
90 cl::cat(ClangFormatCategory));
92 DumpConfig("dump-config",
93 cl::desc("Dump configuration options to stdout and exit.\n"
94 "Can be used with -style option."),
95 cl::cat(ClangFormatCategory));
96 static cl::opt<unsigned>
98 cl::desc("The position of the cursor when invoking\n"
99 "clang-format from an editor integration"),
100 cl::init(0), cl::cat(ClangFormatCategory));
102 static cl::opt<bool> SortIncludes(
104 cl::desc("If set, overrides the include sorting behavior determined by the "
105 "SortIncludes style flag"),
106 cl::cat(ClangFormatCategory));
109 Verbose("verbose", cl::desc("If set, shows the list of processed files"),
110 cl::cat(ClangFormatCategory));
112 // Use --dry-run to match other LLVM tools when you mean do it but don't
116 cl::desc("If set, do not actually make the formatting changes"),
117 cl::cat(ClangFormatCategory));
119 // Use -n as a common command as an alias for --dry-run. (git and make use -n)
120 static cl::alias DryRunShort("n", cl::desc("Alias for --dry-run"),
121 cl::cat(ClangFormatCategory), cl::aliasopt(DryRun),
124 // Emulate being able to turn on/off the warning.
126 WarnFormat("Wclang-format-violations",
127 cl::desc("Warnings about individual formatting changes needed. "
128 "Used only with --dry-run or -n"),
129 cl::init(true), cl::cat(ClangFormatCategory), cl::Hidden);
132 NoWarnFormat("Wno-clang-format-violations",
133 cl::desc("Do not warn about individual formatting changes "
134 "needed. Used only with --dry-run or -n"),
135 cl::init(false), cl::cat(ClangFormatCategory), cl::Hidden);
137 static cl::opt<unsigned> ErrorLimit(
139 cl::desc("Set the maximum number of clang-format errors to emit before "
140 "stopping (0 = no limit). Used only with --dry-run or -n"),
141 cl::init(0), cl::cat(ClangFormatCategory));
144 WarningsAsErrors("Werror",
145 cl::desc("If set, changes formatting warnings to errors"),
146 cl::cat(ClangFormatCategory));
149 ShowColors("fcolor-diagnostics",
150 cl::desc("If set, and on a color-capable terminal controls "
151 "whether or not to print diagnostics in color"),
152 cl::init(true), cl::cat(ClangFormatCategory), cl::Hidden);
155 NoShowColors("fno-color-diagnostics",
156 cl::desc("If set, and on a color-capable terminal controls "
157 "whether or not to print diagnostics in color"),
158 cl::init(false), cl::cat(ClangFormatCategory), cl::Hidden);
160 static cl::list<std::string> FileNames(cl::Positional, cl::desc("[<file> ...]"),
161 cl::cat(ClangFormatCategory));
166 static FileID createInMemoryFile(StringRef FileName, MemoryBuffer *Source,
167 SourceManager &Sources, FileManager &Files,
168 llvm::vfs::InMemoryFileSystem *MemFS) {
169 MemFS->addFileNoOwn(FileName, 0, Source);
170 auto File = Files.getFile(FileName);
171 return Sources.createFileID(File ? *File : nullptr, SourceLocation(),
175 // Parses <start line>:<end line> input to a pair of line numbers.
176 // Returns true on error.
177 static bool parseLineRange(StringRef Input, unsigned &FromLine,
179 std::pair<StringRef, StringRef> LineRange = Input.split(':');
180 return LineRange.first.getAsInteger(0, FromLine) ||
181 LineRange.second.getAsInteger(0, ToLine);
184 static bool fillRanges(MemoryBuffer *Code,
185 std::vector<tooling::Range> &Ranges) {
186 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
187 new llvm::vfs::InMemoryFileSystem);
188 FileManager Files(FileSystemOptions(), InMemoryFileSystem);
189 DiagnosticsEngine Diagnostics(
190 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
191 new DiagnosticOptions);
192 SourceManager Sources(Diagnostics, Files);
193 FileID ID = createInMemoryFile("<irrelevant>", Code, Sources, Files,
194 InMemoryFileSystem.get());
195 if (!LineRanges.empty()) {
196 if (!Offsets.empty() || !Lengths.empty()) {
197 errs() << "error: cannot use -lines with -offset/-length\n";
201 for (unsigned i = 0, e = LineRanges.size(); i < e; ++i) {
202 unsigned FromLine, ToLine;
203 if (parseLineRange(LineRanges[i], FromLine, ToLine)) {
204 errs() << "error: invalid <start line>:<end line> pair\n";
207 if (FromLine > ToLine) {
208 errs() << "error: start line should be less than end line\n";
211 SourceLocation Start = Sources.translateLineCol(ID, FromLine, 1);
212 SourceLocation End = Sources.translateLineCol(ID, ToLine, UINT_MAX);
213 if (Start.isInvalid() || End.isInvalid())
215 unsigned Offset = Sources.getFileOffset(Start);
216 unsigned Length = Sources.getFileOffset(End) - Offset;
217 Ranges.push_back(tooling::Range(Offset, Length));
223 Offsets.push_back(0);
224 if (Offsets.size() != Lengths.size() &&
225 !(Offsets.size() == 1 && Lengths.empty())) {
226 errs() << "error: number of -offset and -length arguments must match.\n";
229 for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
230 if (Offsets[i] >= Code->getBufferSize()) {
231 errs() << "error: offset " << Offsets[i] << " is outside the file\n";
234 SourceLocation Start =
235 Sources.getLocForStartOfFile(ID).getLocWithOffset(Offsets[i]);
237 if (i < Lengths.size()) {
238 if (Offsets[i] + Lengths[i] > Code->getBufferSize()) {
239 errs() << "error: invalid length " << Lengths[i]
240 << ", offset + length (" << Offsets[i] + Lengths[i]
241 << ") is outside the file.\n";
244 End = Start.getLocWithOffset(Lengths[i]);
246 End = Sources.getLocForEndOfFile(ID);
248 unsigned Offset = Sources.getFileOffset(Start);
249 unsigned Length = Sources.getFileOffset(End) - Offset;
250 Ranges.push_back(tooling::Range(Offset, Length));
255 static void outputReplacementXML(StringRef Text) {
256 // FIXME: When we sort includes, we need to make sure the stream is correct
260 while ((Index = Text.find_first_of("\n\r<&", From)) != StringRef::npos) {
261 outs() << Text.substr(From, Index - From);
262 switch (Text[Index]) {
276 llvm_unreachable("Unexpected character encountered!");
280 outs() << Text.substr(From);
283 static void outputReplacementsXML(const Replacements &Replaces) {
284 for (const auto &R : Replaces) {
285 outs() << "<replacement "
286 << "offset='" << R.getOffset() << "' "
287 << "length='" << R.getLength() << "'>";
288 outputReplacementXML(R.getReplacementText());
289 outs() << "</replacement>\n";
293 // If BufStr has an invalid BOM, returns the BOM name; otherwise, returns
295 static const char *getInValidBOM(StringRef BufStr) {
296 // Check to see if the buffer has a UTF Byte Order Mark (BOM).
297 // We only support UTF-8 with and without a BOM right now. See
298 // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding
299 // for more information.
300 const char *InvalidBOM =
301 llvm::StringSwitch<const char *>(BufStr)
302 .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
304 .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
306 .StartsWith("\xFE\xFF", "UTF-16 (BE)")
307 .StartsWith("\xFF\xFE", "UTF-16 (LE)")
308 .StartsWith("\x2B\x2F\x76", "UTF-7")
309 .StartsWith("\xF7\x64\x4C", "UTF-1")
310 .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
311 .StartsWith("\x0E\xFE\xFF", "SCSU")
312 .StartsWith("\xFB\xEE\x28", "BOCU-1")
313 .StartsWith("\x84\x31\x95\x33", "GB-18030")
319 emitReplacementWarnings(const Replacements &Replaces, StringRef AssumedFileName,
320 const std::unique_ptr<llvm::MemoryBuffer> &Code) {
321 if (Replaces.empty()) {
325 IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
326 DiagOpts->ShowColors = (ShowColors && !NoShowColors);
328 TextDiagnosticPrinter *DiagsBuffer =
329 new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts, false);
331 IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
332 IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
333 new DiagnosticsEngine(DiagID, &*DiagOpts, DiagsBuffer));
335 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
336 new llvm::vfs::InMemoryFileSystem);
337 FileManager Files(FileSystemOptions(), InMemoryFileSystem);
338 SourceManager Sources(*Diags, Files);
339 FileID FileID = createInMemoryFile(AssumedFileName, Code.get(), Sources,
340 Files, InMemoryFileSystem.get());
342 const unsigned ID = Diags->getCustomDiagID(
343 WarningsAsErrors ? clang::DiagnosticsEngine::Error
344 : clang::DiagnosticsEngine::Warning,
345 "code should be clang-formatted [-Wclang-format-violations]");
348 DiagsBuffer->BeginSourceFile(LangOptions(), nullptr);
349 if (WarnFormat && !NoWarnFormat) {
350 for (const auto &R : Replaces) {
352 Sources.getLocForStartOfFile(FileID).getLocWithOffset(R.getOffset()),
355 if (ErrorLimit && Errors >= ErrorLimit)
359 DiagsBuffer->EndSourceFile();
360 return WarningsAsErrors;
363 static void outputXML(const Replacements &Replaces,
364 const Replacements &FormatChanges,
365 const FormattingAttemptStatus &Status,
366 const cl::opt<unsigned> &Cursor,
367 unsigned CursorPosition) {
368 outs() << "<?xml version='1.0'?>\n<replacements "
369 "xml:space='preserve' incomplete_format='"
370 << (Status.FormatComplete ? "false" : "true") << "'";
371 if (!Status.FormatComplete)
372 outs() << " line='" << Status.Line << "'";
374 if (Cursor.getNumOccurrences() != 0)
375 outs() << "<cursor>" << FormatChanges.getShiftedCodePosition(CursorPosition)
378 outputReplacementsXML(Replaces);
379 outs() << "</replacements>\n";
382 // Returns true on error.
383 static bool format(StringRef FileName) {
384 if (!OutputXML && Inplace && FileName == "-") {
385 errs() << "error: cannot use -i when reading from stdin.\n";
388 // On Windows, overwriting a file with an open file mapping doesn't work,
389 // so read the whole file into memory when formatting in-place.
390 ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
391 !OutputXML && Inplace ? MemoryBuffer::getFileAsStream(FileName)
392 : MemoryBuffer::getFileOrSTDIN(FileName);
393 if (std::error_code EC = CodeOrErr.getError()) {
394 errs() << EC.message() << "\n";
397 std::unique_ptr<llvm::MemoryBuffer> Code = std::move(CodeOrErr.get());
398 if (Code->getBufferSize() == 0)
399 return false; // Empty files are formatted correctly.
401 StringRef BufStr = Code->getBuffer();
403 const char *InvalidBOM = getInValidBOM(BufStr);
406 errs() << "error: encoding with unsupported byte order mark \""
407 << InvalidBOM << "\" detected";
409 errs() << " in file '" << FileName << "'";
414 std::vector<tooling::Range> Ranges;
415 if (fillRanges(Code.get(), Ranges))
417 StringRef AssumedFileName = (FileName == "-") ? AssumeFileName : FileName;
419 llvm::Expected<FormatStyle> FormatStyle =
420 getStyle(Style, AssumedFileName, FallbackStyle, Code->getBuffer());
422 llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n";
426 if (SortIncludes.getNumOccurrences() != 0)
427 FormatStyle->SortIncludes = SortIncludes;
428 unsigned CursorPosition = Cursor;
429 Replacements Replaces = sortIncludes(*FormatStyle, Code->getBuffer(), Ranges,
430 AssumedFileName, &CursorPosition);
431 auto ChangedCode = tooling::applyAllReplacements(Code->getBuffer(), Replaces);
433 llvm::errs() << llvm::toString(ChangedCode.takeError()) << "\n";
436 // Get new affected ranges after sorting `#includes`.
437 Ranges = tooling::calculateRangesAfterReplacements(Replaces, Ranges);
438 FormattingAttemptStatus Status;
439 Replacements FormatChanges =
440 reformat(*FormatStyle, *ChangedCode, Ranges, AssumedFileName, &Status);
441 Replaces = Replaces.merge(FormatChanges);
442 if (OutputXML || DryRun) {
444 return emitReplacementWarnings(Replaces, AssumedFileName, Code);
446 outputXML(Replaces, FormatChanges, Status, Cursor, CursorPosition);
449 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
450 new llvm::vfs::InMemoryFileSystem);
451 FileManager Files(FileSystemOptions(), InMemoryFileSystem);
452 DiagnosticsEngine Diagnostics(
453 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
454 new DiagnosticOptions);
455 SourceManager Sources(Diagnostics, Files);
456 FileID ID = createInMemoryFile(AssumedFileName, Code.get(), Sources, Files,
457 InMemoryFileSystem.get());
458 Rewriter Rewrite(Sources, LangOptions());
459 tooling::applyAllReplacements(Replaces, Rewrite);
461 if (Rewrite.overwriteChangedFiles())
464 if (Cursor.getNumOccurrences() != 0) {
465 outs() << "{ \"Cursor\": "
466 << FormatChanges.getShiftedCodePosition(CursorPosition)
467 << ", \"IncompleteFormat\": "
468 << (Status.FormatComplete ? "false" : "true");
469 if (!Status.FormatComplete)
470 outs() << ", \"Line\": " << Status.Line;
473 Rewrite.getEditBuffer(ID).write(outs());
479 } // namespace format
482 static void PrintVersion(raw_ostream &OS) {
483 OS << clang::getClangToolFullVersion("clang-format") << '\n';
486 // Dump the configuration.
487 static int dumpConfig() {
489 std::unique_ptr<llvm::MemoryBuffer> Code;
490 if (FileNames.empty()) {
491 // We can't read the code to detect the language if there's no
492 // file name, so leave Code empty here.
493 FileName = AssumeFileName;
495 // Read in the code in case the filename alone isn't enough to
496 // detect the language.
497 ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
498 MemoryBuffer::getFileOrSTDIN(FileNames[0]);
499 if (std::error_code EC = CodeOrErr.getError()) {
500 llvm::errs() << EC.message() << "\n";
503 FileName = (FileNames[0] == "-") ? AssumeFileName : FileNames[0];
504 Code = std::move(CodeOrErr.get());
506 llvm::Expected<clang::format::FormatStyle> FormatStyle =
507 clang::format::getStyle(Style, FileName, FallbackStyle,
508 Code ? Code->getBuffer() : "");
510 llvm::errs() << llvm::toString(FormatStyle.takeError()) << "\n";
513 std::string Config = clang::format::configurationAsText(*FormatStyle);
514 outs() << Config << "\n";
518 int main(int argc, const char **argv) {
519 llvm::InitLLVM X(argc, argv);
521 cl::HideUnrelatedOptions(ClangFormatCategory);
523 cl::SetVersionPrinter(PrintVersion);
524 cl::ParseCommandLineOptions(
526 "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.\n\n"
527 "If no arguments are specified, it formats the code from standard input\n"
528 "and writes the result to the standard output.\n"
529 "If <file>s are given, it reformats the files. If -i is specified\n"
530 "together with <file>s, the files are edited in-place. Otherwise, the\n"
531 "result is written to the standard output.\n");
534 cl::PrintHelpMessage();
543 if (FileNames.empty()) {
544 Error = clang::format::format("-");
545 return Error ? 1 : 0;
547 if (FileNames.size() != 1 &&
548 (!Offsets.empty() || !Lengths.empty() || !LineRanges.empty())) {
549 errs() << "error: -offset, -length and -lines can only be used for "
553 for (const auto &FileName : FileNames) {
555 errs() << "Formatting " << FileName << "\n";
556 Error |= clang::format::format(FileName);
558 return Error ? 1 : 0;