1 //===--- Replacement.cpp - Framework for clang refactoring tools ----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Implements classes to support/store refactorings.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Tooling/Core/Replacement.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/DiagnosticIDs.h"
18 #include "clang/Basic/DiagnosticOptions.h"
19 #include "clang/Basic/FileManager.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Lex/Lexer.h"
22 #include "clang/Rewrite/Core/Rewriter.h"
23 #include "llvm/Support/FileSystem.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_os_ostream.h"
30 static const char * const InvalidLocation = "";
32 Replacement::Replacement()
33 : FilePath(InvalidLocation) {}
35 Replacement::Replacement(StringRef FilePath, unsigned Offset, unsigned Length,
36 StringRef ReplacementText)
37 : FilePath(FilePath), ReplacementRange(Offset, Length),
38 ReplacementText(ReplacementText) {}
40 Replacement::Replacement(const SourceManager &Sources, SourceLocation Start,
41 unsigned Length, StringRef ReplacementText) {
42 setFromSourceLocation(Sources, Start, Length, ReplacementText);
45 Replacement::Replacement(const SourceManager &Sources,
46 const CharSourceRange &Range,
47 StringRef ReplacementText,
48 const LangOptions &LangOpts) {
49 setFromSourceRange(Sources, Range, ReplacementText, LangOpts);
52 bool Replacement::isApplicable() const {
53 return FilePath != InvalidLocation;
56 bool Replacement::apply(Rewriter &Rewrite) const {
57 SourceManager &SM = Rewrite.getSourceMgr();
58 const FileEntry *Entry = SM.getFileManager().getFile(FilePath);
62 FileID ID = SM.getOrCreateFileID(Entry, SrcMgr::C_User);
63 const SourceLocation Start =
64 SM.getLocForStartOfFile(ID).
65 getLocWithOffset(ReplacementRange.getOffset());
66 // ReplaceText returns false on success.
67 // ReplaceText only fails if the source location is not a file location, in
68 // which case we already returned false earlier.
69 bool RewriteSucceeded = !Rewrite.ReplaceText(
70 Start, ReplacementRange.getLength(), ReplacementText);
71 assert(RewriteSucceeded);
72 return RewriteSucceeded;
75 std::string Replacement::toString() const {
77 llvm::raw_string_ostream Stream(Result);
78 Stream << FilePath << ": " << ReplacementRange.getOffset() << ":+"
79 << ReplacementRange.getLength() << ":\"" << ReplacementText << "\"";
83 bool operator<(const Replacement &LHS, const Replacement &RHS) {
84 if (LHS.getOffset() != RHS.getOffset())
85 return LHS.getOffset() < RHS.getOffset();
87 // Apply longer replacements first, specifically so that deletions are
88 // executed before insertions. It is (hopefully) never the intention to
89 // delete parts of newly inserted code.
90 if (LHS.getLength() != RHS.getLength())
91 return LHS.getLength() > RHS.getLength();
93 if (LHS.getFilePath() != RHS.getFilePath())
94 return LHS.getFilePath() < RHS.getFilePath();
95 return LHS.getReplacementText() < RHS.getReplacementText();
98 bool operator==(const Replacement &LHS, const Replacement &RHS) {
99 return LHS.getOffset() == RHS.getOffset() &&
100 LHS.getLength() == RHS.getLength() &&
101 LHS.getFilePath() == RHS.getFilePath() &&
102 LHS.getReplacementText() == RHS.getReplacementText();
105 void Replacement::setFromSourceLocation(const SourceManager &Sources,
106 SourceLocation Start, unsigned Length,
107 StringRef ReplacementText) {
108 const std::pair<FileID, unsigned> DecomposedLocation =
109 Sources.getDecomposedLoc(Start);
110 const FileEntry *Entry = Sources.getFileEntryForID(DecomposedLocation.first);
111 this->FilePath = Entry ? Entry->getName() : InvalidLocation;
112 this->ReplacementRange = Range(DecomposedLocation.second, Length);
113 this->ReplacementText = ReplacementText;
116 // FIXME: This should go into the Lexer, but we need to figure out how
117 // to handle ranges for refactoring in general first - there is no obvious
118 // good way how to integrate this into the Lexer yet.
119 static int getRangeSize(const SourceManager &Sources,
120 const CharSourceRange &Range,
121 const LangOptions &LangOpts) {
122 SourceLocation SpellingBegin = Sources.getSpellingLoc(Range.getBegin());
123 SourceLocation SpellingEnd = Sources.getSpellingLoc(Range.getEnd());
124 std::pair<FileID, unsigned> Start = Sources.getDecomposedLoc(SpellingBegin);
125 std::pair<FileID, unsigned> End = Sources.getDecomposedLoc(SpellingEnd);
126 if (Start.first != End.first) return -1;
127 if (Range.isTokenRange())
128 End.second += Lexer::MeasureTokenLength(SpellingEnd, Sources, LangOpts);
129 return End.second - Start.second;
132 void Replacement::setFromSourceRange(const SourceManager &Sources,
133 const CharSourceRange &Range,
134 StringRef ReplacementText,
135 const LangOptions &LangOpts) {
136 setFromSourceLocation(Sources, Sources.getSpellingLoc(Range.getBegin()),
137 getRangeSize(Sources, Range, LangOpts),
141 template <typename T>
142 unsigned shiftedCodePositionInternal(const T &Replaces, unsigned Position) {
144 for (const auto& R : Replaces) {
145 if (R.getOffset() + R.getLength() <= Position) {
146 Offset += R.getReplacementText().size() - R.getLength();
149 if (R.getOffset() < Position &&
150 R.getOffset() + R.getReplacementText().size() <= Position) {
151 Position = R.getOffset() + R.getReplacementText().size() - 1;
155 return Position + Offset;
158 unsigned shiftedCodePosition(const Replacements &Replaces, unsigned Position) {
159 return shiftedCodePositionInternal(Replaces, Position);
162 // FIXME: Remove this function when Replacements is implemented as std::vector
163 // instead of std::set.
164 unsigned shiftedCodePosition(const std::vector<Replacement> &Replaces,
166 return shiftedCodePositionInternal(Replaces, Position);
169 void deduplicate(std::vector<Replacement> &Replaces,
170 std::vector<Range> &Conflicts) {
171 if (Replaces.empty())
174 auto LessNoPath = [](const Replacement &LHS, const Replacement &RHS) {
175 if (LHS.getOffset() != RHS.getOffset())
176 return LHS.getOffset() < RHS.getOffset();
177 if (LHS.getLength() != RHS.getLength())
178 return LHS.getLength() < RHS.getLength();
179 return LHS.getReplacementText() < RHS.getReplacementText();
182 auto EqualNoPath = [](const Replacement &LHS, const Replacement &RHS) {
183 return LHS.getOffset() == RHS.getOffset() &&
184 LHS.getLength() == RHS.getLength() &&
185 LHS.getReplacementText() == RHS.getReplacementText();
188 // Deduplicate. We don't want to deduplicate based on the path as we assume
189 // that all replacements refer to the same file (or are symlinks).
190 std::sort(Replaces.begin(), Replaces.end(), LessNoPath);
191 Replaces.erase(std::unique(Replaces.begin(), Replaces.end(), EqualNoPath),
195 Range ConflictRange(Replaces.front().getOffset(),
196 Replaces.front().getLength());
197 unsigned ConflictStart = 0;
198 unsigned ConflictLength = 1;
199 for (unsigned i = 1; i < Replaces.size(); ++i) {
200 Range Current(Replaces[i].getOffset(), Replaces[i].getLength());
201 if (ConflictRange.overlapsWith(Current)) {
202 // Extend conflicted range
203 ConflictRange = Range(ConflictRange.getOffset(),
204 std::max(ConflictRange.getLength(),
205 Current.getOffset() + Current.getLength() -
206 ConflictRange.getOffset()));
209 if (ConflictLength > 1)
210 Conflicts.push_back(Range(ConflictStart, ConflictLength));
211 ConflictRange = Current;
217 if (ConflictLength > 1)
218 Conflicts.push_back(Range(ConflictStart, ConflictLength));
221 bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite) {
223 for (Replacements::const_iterator I = Replaces.begin(),
226 if (I->isApplicable()) {
227 Result = I->apply(Rewrite) && Result;
235 // FIXME: Remove this function when Replacements is implemented as std::vector
236 // instead of std::set.
237 bool applyAllReplacements(const std::vector<Replacement> &Replaces,
240 for (std::vector<Replacement>::const_iterator I = Replaces.begin(),
243 if (I->isApplicable()) {
244 Result = I->apply(Rewrite) && Result;
252 llvm::Expected<std::string> applyAllReplacements(StringRef Code,
253 const Replacements &Replaces) {
254 if (Replaces.empty())
257 IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
258 new vfs::InMemoryFileSystem);
259 FileManager Files(FileSystemOptions(), InMemoryFileSystem);
260 DiagnosticsEngine Diagnostics(
261 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
262 new DiagnosticOptions);
263 SourceManager SourceMgr(Diagnostics, Files);
264 Rewriter Rewrite(SourceMgr, LangOptions());
265 InMemoryFileSystem->addFile(
266 "<stdin>", 0, llvm::MemoryBuffer::getMemBuffer(Code, "<stdin>"));
267 FileID ID = SourceMgr.createFileID(Files.getFile("<stdin>"), SourceLocation(),
268 clang::SrcMgr::C_User);
269 for (Replacements::const_iterator I = Replaces.begin(), E = Replaces.end();
271 Replacement Replace("<stdin>", I->getOffset(), I->getLength(),
272 I->getReplacementText());
273 if (!Replace.apply(Rewrite))
274 return llvm::make_error<llvm::StringError>(
275 "Failed to apply replacement: " + Replace.toString(),
276 llvm::inconvertibleErrorCode());
279 llvm::raw_string_ostream OS(Result);
280 Rewrite.getEditBuffer(ID).write(OS);
285 // Merge and sort overlapping ranges in \p Ranges.
286 static std::vector<Range> mergeAndSortRanges(std::vector<Range> Ranges) {
287 std::sort(Ranges.begin(), Ranges.end(),
288 [](const Range &LHS, const Range &RHS) {
289 if (LHS.getOffset() != RHS.getOffset())
290 return LHS.getOffset() < RHS.getOffset();
291 return LHS.getLength() < RHS.getLength();
293 std::vector<Range> Result;
294 for (const auto &R : Ranges) {
295 if (Result.empty() ||
296 Result.back().getOffset() + Result.back().getLength() < R.getOffset()) {
300 std::max(Result.back().getOffset() + Result.back().getLength(),
301 R.getOffset() + R.getLength());
302 Result[Result.size() - 1] =
303 Range(Result.back().getOffset(), NewEnd - Result.back().getOffset());
309 std::vector<Range> calculateChangedRanges(const Replacements &Replaces) {
310 std::vector<Range> ChangedRanges;
312 for (const Replacement &R : Replaces) {
313 unsigned Offset = R.getOffset() + Shift;
314 unsigned Length = R.getReplacementText().size();
315 Shift += Length - R.getLength();
316 ChangedRanges.push_back(Range(Offset, Length));
318 return mergeAndSortRanges(ChangedRanges);
322 calculateRangesAfterReplacements(const Replacements &Replaces,
323 const std::vector<Range> &Ranges) {
324 auto MergedRanges = mergeAndSortRanges(Ranges);
325 tooling::Replacements FakeReplaces;
326 for (const auto &R : MergedRanges)
327 FakeReplaces.insert(Replacement(Replaces.begin()->getFilePath(),
328 R.getOffset(), R.getLength(),
329 std::string(R.getLength(), ' ')));
330 tooling::Replacements NewReplaces = mergeReplacements(FakeReplaces, Replaces);
331 return calculateChangedRanges(NewReplaces);
335 // Represents a merged replacement, i.e. a replacement consisting of multiple
336 // overlapping replacements from 'First' and 'Second' in mergeReplacements.
338 // Position projection:
339 // Offsets and lengths of the replacements can generally refer to two different
340 // coordinate spaces. Replacements from 'First' refer to the original text
341 // whereas replacements from 'Second' refer to the text after applying 'First'.
343 // MergedReplacement always operates in the coordinate space of the original
344 // text, i.e. transforms elements from 'Second' to take into account what was
345 // changed based on the elements from 'First'.
347 // We can correctly calculate this projection as we look at the replacements in
348 // order of strictly increasing offsets.
351 // * We always merge elements from 'First' into elements from 'Second' and vice
352 // versa. Within each set, the replacements are non-overlapping.
353 // * We only extend to the right, i.e. merge elements with strictly increasing
355 class MergedReplacement {
357 MergedReplacement(const Replacement &R, bool MergeSecond, int D)
358 : MergeSecond(MergeSecond), Delta(D), FilePath(R.getFilePath()),
359 Offset(R.getOffset() + (MergeSecond ? 0 : Delta)), Length(R.getLength()),
360 Text(R.getReplacementText()) {
361 Delta += MergeSecond ? 0 : Text.size() - Length;
362 DeltaFirst = MergeSecond ? Text.size() - Length : 0;
365 // Merges the next element 'R' into this merged element. As we always merge
366 // from 'First' into 'Second' or vice versa, the MergedReplacement knows what
367 // set the next element is coming from.
368 void merge(const Replacement &R) {
370 unsigned REnd = R.getOffset() + Delta + R.getLength();
371 unsigned End = Offset + Text.size();
373 Length += REnd - End;
376 StringRef TextRef = Text;
377 StringRef Head = TextRef.substr(0, R.getOffset() + Delta - Offset);
378 StringRef Tail = TextRef.substr(REnd - Offset);
379 Text = (Head + R.getReplacementText() + Tail).str();
380 Delta += R.getReplacementText().size() - R.getLength();
382 unsigned End = Offset + Length;
383 StringRef RText = R.getReplacementText();
384 StringRef Tail = RText.substr(End - R.getOffset());
385 Text = (Text + Tail).str();
386 if (R.getOffset() + RText.size() > End) {
387 Length = R.getOffset() + R.getLength() - Offset;
390 Length += R.getLength() - RText.size();
392 DeltaFirst += RText.size() - R.getLength();
396 // Returns 'true' if 'R' starts strictly after the MergedReplacement and thus
397 // doesn't need to be merged.
398 bool endsBefore(const Replacement &R) const {
400 return Offset + Text.size() < R.getOffset() + Delta;
401 return Offset + Length < R.getOffset();
404 // Returns 'true' if an element from the second set should be merged next.
405 bool mergeSecond() const { return MergeSecond; }
406 int deltaFirst() const { return DeltaFirst; }
407 Replacement asReplacement() const { return {FilePath, Offset, Length, Text}; }
412 // Amount of characters that elements from 'Second' need to be shifted by in
413 // order to refer to the original text.
416 // Sum of all deltas (text-length - length) of elements from 'First' merged
417 // into this element. This is used to update 'Delta' once the
418 // MergedReplacement is completed.
421 // Data of the actually merged replacement. FilePath and Offset aren't changed
422 // as the element is only extended to the right.
423 const StringRef FilePath;
424 const unsigned Offset;
430 std::map<std::string, Replacements>
431 groupReplacementsByFile(const Replacements &Replaces) {
432 std::map<std::string, Replacements> FileToReplaces;
433 for (const auto &Replace : Replaces) {
434 FileToReplaces[Replace.getFilePath()].insert(Replace);
436 return FileToReplaces;
439 Replacements mergeReplacements(const Replacements &First,
440 const Replacements &Second) {
441 if (First.empty() || Second.empty())
442 return First.empty() ? Second : First;
444 // Delta is the amount of characters that replacements from 'Second' need to
445 // be shifted so that their offsets refer to the original text.
449 // Iterate over both sets and always add the next element (smallest total
450 // Offset) from either 'First' or 'Second'. Merge that element with
451 // subsequent replacements as long as they overlap. See more details in the
452 // comment on MergedReplacement.
453 for (auto FirstI = First.begin(), SecondI = Second.begin();
454 FirstI != First.end() || SecondI != Second.end();) {
455 bool NextIsFirst = SecondI == Second.end() ||
456 (FirstI != First.end() &&
457 FirstI->getOffset() < SecondI->getOffset() + Delta);
458 MergedReplacement Merged(NextIsFirst ? *FirstI : *SecondI, NextIsFirst,
460 ++(NextIsFirst ? FirstI : SecondI);
462 while ((Merged.mergeSecond() && SecondI != Second.end()) ||
463 (!Merged.mergeSecond() && FirstI != First.end())) {
464 auto &I = Merged.mergeSecond() ? SecondI : FirstI;
465 if (Merged.endsBefore(*I))
470 Delta -= Merged.deltaFirst();
471 Result.insert(Merged.asReplacement());
476 } // end namespace tooling
477 } // end namespace clang