1 //===--- Replacement.cpp - Framework for clang refactoring tools ----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Implements classes to support/store refactorings.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Tooling/Core/Replacement.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/DiagnosticIDs.h"
18 #include "clang/Basic/DiagnosticOptions.h"
19 #include "clang/Basic/FileManager.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Lex/Lexer.h"
22 #include "clang/Rewrite/Core/Rewriter.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/Path.h"
26 #include "llvm/Support/raw_os_ostream.h"
31 static const char * const InvalidLocation = "";
33 Replacement::Replacement() : FilePath(InvalidLocation) {}
35 Replacement::Replacement(StringRef FilePath, unsigned Offset, unsigned Length,
36 StringRef ReplacementText)
37 : FilePath(FilePath), ReplacementRange(Offset, Length),
38 ReplacementText(ReplacementText) {}
40 Replacement::Replacement(const SourceManager &Sources, SourceLocation Start,
41 unsigned Length, StringRef ReplacementText) {
42 setFromSourceLocation(Sources, Start, Length, ReplacementText);
45 Replacement::Replacement(const SourceManager &Sources,
46 const CharSourceRange &Range,
47 StringRef ReplacementText,
48 const LangOptions &LangOpts) {
49 setFromSourceRange(Sources, Range, ReplacementText, LangOpts);
52 bool Replacement::isApplicable() const {
53 return FilePath != InvalidLocation;
56 bool Replacement::apply(Rewriter &Rewrite) const {
57 SourceManager &SM = Rewrite.getSourceMgr();
58 const FileEntry *Entry = SM.getFileManager().getFile(FilePath);
62 FileID ID = SM.getOrCreateFileID(Entry, SrcMgr::C_User);
63 const SourceLocation Start =
64 SM.getLocForStartOfFile(ID).
65 getLocWithOffset(ReplacementRange.getOffset());
66 // ReplaceText returns false on success.
67 // ReplaceText only fails if the source location is not a file location, in
68 // which case we already returned false earlier.
69 bool RewriteSucceeded = !Rewrite.ReplaceText(
70 Start, ReplacementRange.getLength(), ReplacementText);
71 assert(RewriteSucceeded);
72 return RewriteSucceeded;
75 std::string Replacement::toString() const {
77 llvm::raw_string_ostream Stream(Result);
78 Stream << FilePath << ": " << ReplacementRange.getOffset() << ":+"
79 << ReplacementRange.getLength() << ":\"" << ReplacementText << "\"";
83 bool operator<(const Replacement &LHS, const Replacement &RHS) {
84 if (LHS.getOffset() != RHS.getOffset())
85 return LHS.getOffset() < RHS.getOffset();
87 if (LHS.getLength() != RHS.getLength())
88 return LHS.getLength() < RHS.getLength();
90 if (LHS.getFilePath() != RHS.getFilePath())
91 return LHS.getFilePath() < RHS.getFilePath();
92 return LHS.getReplacementText() < RHS.getReplacementText();
95 bool operator==(const Replacement &LHS, const Replacement &RHS) {
96 return LHS.getOffset() == RHS.getOffset() &&
97 LHS.getLength() == RHS.getLength() &&
98 LHS.getFilePath() == RHS.getFilePath() &&
99 LHS.getReplacementText() == RHS.getReplacementText();
102 void Replacement::setFromSourceLocation(const SourceManager &Sources,
103 SourceLocation Start, unsigned Length,
104 StringRef ReplacementText) {
105 const std::pair<FileID, unsigned> DecomposedLocation =
106 Sources.getDecomposedLoc(Start);
107 const FileEntry *Entry = Sources.getFileEntryForID(DecomposedLocation.first);
108 this->FilePath = Entry ? Entry->getName() : InvalidLocation;
109 this->ReplacementRange = Range(DecomposedLocation.second, Length);
110 this->ReplacementText = ReplacementText;
113 // FIXME: This should go into the Lexer, but we need to figure out how
114 // to handle ranges for refactoring in general first - there is no obvious
115 // good way how to integrate this into the Lexer yet.
116 static int getRangeSize(const SourceManager &Sources,
117 const CharSourceRange &Range,
118 const LangOptions &LangOpts) {
119 SourceLocation SpellingBegin = Sources.getSpellingLoc(Range.getBegin());
120 SourceLocation SpellingEnd = Sources.getSpellingLoc(Range.getEnd());
121 std::pair<FileID, unsigned> Start = Sources.getDecomposedLoc(SpellingBegin);
122 std::pair<FileID, unsigned> End = Sources.getDecomposedLoc(SpellingEnd);
123 if (Start.first != End.first) return -1;
124 if (Range.isTokenRange())
125 End.second += Lexer::MeasureTokenLength(SpellingEnd, Sources, LangOpts);
126 return End.second - Start.second;
129 void Replacement::setFromSourceRange(const SourceManager &Sources,
130 const CharSourceRange &Range,
131 StringRef ReplacementText,
132 const LangOptions &LangOpts) {
133 setFromSourceLocation(Sources, Sources.getSpellingLoc(Range.getBegin()),
134 getRangeSize(Sources, Range, LangOpts),
139 Replacements::getReplacementInChangedCode(const Replacement &R) const {
140 unsigned NewStart = getShiftedCodePosition(R.getOffset());
141 unsigned NewEnd = getShiftedCodePosition(R.getOffset() + R.getLength());
142 return Replacement(R.getFilePath(), NewStart, NewEnd - NewStart,
143 R.getReplacementText());
146 static std::string getReplacementErrString(replacement_error Err) {
148 case replacement_error::fail_to_apply:
149 return "Failed to apply a replacement.";
150 case replacement_error::wrong_file_path:
151 return "The new replacement's file path is different from the file path of "
152 "existing replacements";
153 case replacement_error::overlap_conflict:
154 return "The new replacement overlaps with an existing replacement.";
155 case replacement_error::insert_conflict:
156 return "The new insertion has the same insert location as an existing "
159 llvm_unreachable("A value of replacement_error has no message.");
162 std::string ReplacementError::message() const {
163 std::string Message = getReplacementErrString(Err);
164 if (NewReplacement.hasValue())
165 Message += "\nNew replacement: " + NewReplacement->toString();
166 if (ExistingReplacement.hasValue())
167 Message += "\nExisting replacement: " + ExistingReplacement->toString();
171 char ReplacementError::ID = 0;
173 Replacements Replacements::getCanonicalReplacements() const {
174 std::vector<Replacement> NewReplaces;
175 // Merge adjacent replacements.
176 for (const auto &R : Replaces) {
177 if (NewReplaces.empty()) {
178 NewReplaces.push_back(R);
181 auto &Prev = NewReplaces.back();
182 unsigned PrevEnd = Prev.getOffset() + Prev.getLength();
183 if (PrevEnd < R.getOffset()) {
184 NewReplaces.push_back(R);
186 assert(PrevEnd == R.getOffset() &&
187 "Existing replacements must not overlap.");
189 R.getFilePath(), Prev.getOffset(), Prev.getLength() + R.getLength(),
190 (Prev.getReplacementText() + R.getReplacementText()).str());
194 ReplacementsImpl NewReplacesImpl(NewReplaces.begin(), NewReplaces.end());
195 return Replacements(NewReplacesImpl.begin(), NewReplacesImpl.end());
198 // `R` and `Replaces` are order-independent if applying them in either order
199 // has the same effect, so we need to compare replacements associated to
200 // applying them in either order.
201 llvm::Expected<Replacements>
202 Replacements::mergeIfOrderIndependent(const Replacement &R) const {
204 // A Replacements set containg a single replacement that is `R` referring to
205 // the code after the existing replacements `Replaces` are applied.
206 Replacements RsShiftedByReplaces(getReplacementInChangedCode(R));
207 // A Replacements set that is `Replaces` referring to the code after `R` is
209 Replacements ReplacesShiftedByRs;
210 for (const auto &Replace : Replaces)
211 ReplacesShiftedByRs.Replaces.insert(
212 Rs.getReplacementInChangedCode(Replace));
213 // This is equivalent to applying `Replaces` first and then `R`.
214 auto MergeShiftedRs = merge(RsShiftedByReplaces);
215 // This is equivalent to applying `R` first and then `Replaces`.
216 auto MergeShiftedReplaces = Rs.merge(ReplacesShiftedByRs);
218 // Since empty or segmented replacements around existing replacements might be
219 // produced above, we need to compare replacements in canonical forms.
220 if (MergeShiftedRs.getCanonicalReplacements() ==
221 MergeShiftedReplaces.getCanonicalReplacements())
222 return MergeShiftedRs;
223 return llvm::make_error<ReplacementError>(replacement_error::overlap_conflict,
224 R, *Replaces.begin());
227 llvm::Error Replacements::add(const Replacement &R) {
228 // Check the file path.
229 if (!Replaces.empty() && R.getFilePath() != Replaces.begin()->getFilePath())
230 return llvm::make_error<ReplacementError>(
231 replacement_error::wrong_file_path, R, *Replaces.begin());
233 // Special-case header insertions.
234 if (R.getOffset() == UINT_MAX) {
236 return llvm::Error::success();
239 // This replacement cannot conflict with replacements that end before
240 // this replacement starts or start after this replacement ends.
241 // We also know that there currently are no overlapping replacements.
242 // Thus, we know that all replacements that start after the end of the current
243 // replacement cannot overlap.
244 Replacement AtEnd(R.getFilePath(), R.getOffset() + R.getLength(), 0, "");
246 // Find the first entry that starts after or at the end of R. Note that
247 // entries that start at the end can still be conflicting if R is an
249 auto I = Replaces.lower_bound(AtEnd);
250 // If `I` starts at the same offset as `R`, `R` must be an insertion.
251 if (I != Replaces.end() && R.getOffset() == I->getOffset()) {
252 assert(R.getLength() == 0);
253 // `I` is also an insertion, `R` and `I` conflict.
254 if (I->getLength() == 0) {
255 // Check if two insertions are order-indepedent: if inserting them in
256 // either order produces the same text, they are order-independent.
257 if ((R.getReplacementText() + I->getReplacementText()).str() !=
258 (I->getReplacementText() + R.getReplacementText()).str())
259 return llvm::make_error<ReplacementError>(
260 replacement_error::insert_conflict, R, *I);
261 // If insertions are order-independent, we can merge them.
263 R.getFilePath(), R.getOffset(), 0,
264 (R.getReplacementText() + I->getReplacementText()).str());
266 Replaces.insert(std::move(NewR));
267 return llvm::Error::success();
269 // Insertion `R` is adjacent to a non-insertion replacement `I`, so they
270 // are order-independent. It is safe to assume that `R` will not conflict
271 // with any replacement before `I` since all replacements before `I` must
272 // either end before `R` or end at `R` but has length > 0 (if the
273 // replacement before `I` is an insertion at `R`, it would have been `I`
274 // since it is a lower bound of `AtEnd` and ordered before the current `I`
277 return llvm::Error::success();
280 // `I` is the smallest iterator (after `R`) whose entry cannot overlap.
281 // If that is begin(), there are no overlaps.
282 if (I == Replaces.begin()) {
284 return llvm::Error::success();
287 auto Overlap = [](const Replacement &R1, const Replacement &R2) -> bool {
288 return Range(R1.getOffset(), R1.getLength())
289 .overlapsWith(Range(R2.getOffset(), R2.getLength()));
291 // If the previous entry does not overlap, we know that entries before it
292 // can also not overlap.
293 if (!Overlap(R, *I)) {
294 // If `R` and `I` do not have the same offset, it is safe to add `R` since
295 // it must come after `I`. Otherwise:
296 // - If `R` is an insertion, `I` must not be an insertion since it would
297 // have come after `AtEnd`.
298 // - If `R` is not an insertion, `I` must be an insertion; otherwise, `R`
299 // and `I` would have overlapped.
300 // In either case, we can safely insert `R`.
303 // `I` overlaps with `R`. We need to check `R` against all overlapping
304 // replacements to see if they are order-indepedent. If they are, merge `R`
305 // with them and replace them with the merged replacements.
307 auto MergeEnd = std::next(I);
308 while (I != Replaces.begin()) {
310 // If `I` doesn't overlap with `R`, don't merge it.
315 Replacements OverlapReplaces(MergeBegin, MergeEnd);
316 llvm::Expected<Replacements> Merged =
317 OverlapReplaces.mergeIfOrderIndependent(R);
319 return Merged.takeError();
320 Replaces.erase(MergeBegin, MergeEnd);
321 Replaces.insert(Merged->begin(), Merged->end());
323 return llvm::Error::success();
328 // Represents a merged replacement, i.e. a replacement consisting of multiple
329 // overlapping replacements from 'First' and 'Second' in mergeReplacements.
331 // Position projection:
332 // Offsets and lengths of the replacements can generally refer to two different
333 // coordinate spaces. Replacements from 'First' refer to the original text
334 // whereas replacements from 'Second' refer to the text after applying 'First'.
336 // MergedReplacement always operates in the coordinate space of the original
337 // text, i.e. transforms elements from 'Second' to take into account what was
338 // changed based on the elements from 'First'.
340 // We can correctly calculate this projection as we look at the replacements in
341 // order of strictly increasing offsets.
344 // * We always merge elements from 'First' into elements from 'Second' and vice
345 // versa. Within each set, the replacements are non-overlapping.
346 // * We only extend to the right, i.e. merge elements with strictly increasing
348 class MergedReplacement {
350 MergedReplacement(const Replacement &R, bool MergeSecond, int D)
351 : MergeSecond(MergeSecond), Delta(D), FilePath(R.getFilePath()),
352 Offset(R.getOffset() + (MergeSecond ? 0 : Delta)), Length(R.getLength()),
353 Text(R.getReplacementText()) {
354 Delta += MergeSecond ? 0 : Text.size() - Length;
355 DeltaFirst = MergeSecond ? Text.size() - Length : 0;
358 // Merges the next element 'R' into this merged element. As we always merge
359 // from 'First' into 'Second' or vice versa, the MergedReplacement knows what
360 // set the next element is coming from.
361 void merge(const Replacement &R) {
363 unsigned REnd = R.getOffset() + Delta + R.getLength();
364 unsigned End = Offset + Text.size();
366 Length += REnd - End;
369 StringRef TextRef = Text;
370 StringRef Head = TextRef.substr(0, R.getOffset() + Delta - Offset);
371 StringRef Tail = TextRef.substr(REnd - Offset);
372 Text = (Head + R.getReplacementText() + Tail).str();
373 Delta += R.getReplacementText().size() - R.getLength();
375 unsigned End = Offset + Length;
376 StringRef RText = R.getReplacementText();
377 StringRef Tail = RText.substr(End - R.getOffset());
378 Text = (Text + Tail).str();
379 if (R.getOffset() + RText.size() > End) {
380 Length = R.getOffset() + R.getLength() - Offset;
383 Length += R.getLength() - RText.size();
385 DeltaFirst += RText.size() - R.getLength();
389 // Returns 'true' if 'R' starts strictly after the MergedReplacement and thus
390 // doesn't need to be merged.
391 bool endsBefore(const Replacement &R) const {
393 return Offset + Text.size() < R.getOffset() + Delta;
394 return Offset + Length < R.getOffset();
397 // Returns 'true' if an element from the second set should be merged next.
398 bool mergeSecond() const { return MergeSecond; }
399 int deltaFirst() const { return DeltaFirst; }
400 Replacement asReplacement() const { return {FilePath, Offset, Length, Text}; }
405 // Amount of characters that elements from 'Second' need to be shifted by in
406 // order to refer to the original text.
409 // Sum of all deltas (text-length - length) of elements from 'First' merged
410 // into this element. This is used to update 'Delta' once the
411 // MergedReplacement is completed.
414 // Data of the actually merged replacement. FilePath and Offset aren't changed
415 // as the element is only extended to the right.
416 const StringRef FilePath;
417 const unsigned Offset;
424 Replacements Replacements::merge(const Replacements &ReplacesToMerge) const {
425 if (empty() || ReplacesToMerge.empty())
426 return empty() ? ReplacesToMerge : *this;
428 auto &First = Replaces;
429 auto &Second = ReplacesToMerge.Replaces;
430 // Delta is the amount of characters that replacements from 'Second' need to
431 // be shifted so that their offsets refer to the original text.
433 ReplacementsImpl Result;
435 // Iterate over both sets and always add the next element (smallest total
436 // Offset) from either 'First' or 'Second'. Merge that element with
437 // subsequent replacements as long as they overlap. See more details in the
438 // comment on MergedReplacement.
439 for (auto FirstI = First.begin(), SecondI = Second.begin();
440 FirstI != First.end() || SecondI != Second.end();) {
441 bool NextIsFirst = SecondI == Second.end() ||
442 (FirstI != First.end() &&
443 FirstI->getOffset() < SecondI->getOffset() + Delta);
444 MergedReplacement Merged(NextIsFirst ? *FirstI : *SecondI, NextIsFirst,
446 ++(NextIsFirst ? FirstI : SecondI);
448 while ((Merged.mergeSecond() && SecondI != Second.end()) ||
449 (!Merged.mergeSecond() && FirstI != First.end())) {
450 auto &I = Merged.mergeSecond() ? SecondI : FirstI;
451 if (Merged.endsBefore(*I))
456 Delta -= Merged.deltaFirst();
457 Result.insert(Merged.asReplacement());
459 return Replacements(Result.begin(), Result.end());
462 // Combines overlapping ranges in \p Ranges and sorts the combined ranges.
463 // Returns a set of non-overlapping and sorted ranges that is equivalent to
465 static std::vector<Range> combineAndSortRanges(std::vector<Range> Ranges) {
466 std::sort(Ranges.begin(), Ranges.end(),
467 [](const Range &LHS, const Range &RHS) {
468 if (LHS.getOffset() != RHS.getOffset())
469 return LHS.getOffset() < RHS.getOffset();
470 return LHS.getLength() < RHS.getLength();
472 std::vector<Range> Result;
473 for (const auto &R : Ranges) {
474 if (Result.empty() ||
475 Result.back().getOffset() + Result.back().getLength() < R.getOffset()) {
479 std::max(Result.back().getOffset() + Result.back().getLength(),
480 R.getOffset() + R.getLength());
481 Result[Result.size() - 1] =
482 Range(Result.back().getOffset(), NewEnd - Result.back().getOffset());
489 calculateRangesAfterReplacements(const Replacements &Replaces,
490 const std::vector<Range> &Ranges) {
491 // To calculate the new ranges,
492 // - Turn \p Ranges into Replacements at (offset, length) with an empty
493 // (unimportant) replacement text of length "length".
494 // - Merge with \p Replaces.
495 // - The new ranges will be the affected ranges of the merged replacements.
496 auto MergedRanges = combineAndSortRanges(Ranges);
497 if (Replaces.empty())
499 tooling::Replacements FakeReplaces;
500 for (const auto &R : MergedRanges) {
501 auto Err = FakeReplaces.add(Replacement(Replaces.begin()->getFilePath(),
502 R.getOffset(), R.getLength(),
503 std::string(R.getLength(), ' ')));
505 "Replacements must not conflict since ranges have been merged.");
508 return FakeReplaces.merge(Replaces).getAffectedRanges();
511 std::vector<Range> Replacements::getAffectedRanges() const {
512 std::vector<Range> ChangedRanges;
514 for (const Replacement &R : Replaces) {
515 unsigned Offset = R.getOffset() + Shift;
516 unsigned Length = R.getReplacementText().size();
517 Shift += Length - R.getLength();
518 ChangedRanges.push_back(Range(Offset, Length));
520 return combineAndSortRanges(ChangedRanges);
523 unsigned Replacements::getShiftedCodePosition(unsigned Position) const {
525 for (const auto& R : Replaces) {
526 if (R.getOffset() + R.getLength() <= Position) {
527 Offset += R.getReplacementText().size() - R.getLength();
530 if (R.getOffset() < Position &&
531 R.getOffset() + R.getReplacementText().size() <= Position) {
532 Position = R.getOffset() + R.getReplacementText().size();
533 if (R.getReplacementText().size() > 0)
538 return Position + Offset;
541 bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite) {
543 for (auto I = Replaces.rbegin(), E = Replaces.rend(); I != E; ++I) {
544 if (I->isApplicable()) {
545 Result = I->apply(Rewrite) && Result;
553 llvm::Expected<std::string> applyAllReplacements(StringRef Code,
554 const Replacements &Replaces) {
555 if (Replaces.empty())
558 IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
559 new vfs::InMemoryFileSystem);
560 FileManager Files(FileSystemOptions(), InMemoryFileSystem);
561 DiagnosticsEngine Diagnostics(
562 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
563 new DiagnosticOptions);
564 SourceManager SourceMgr(Diagnostics, Files);
565 Rewriter Rewrite(SourceMgr, LangOptions());
566 InMemoryFileSystem->addFile(
567 "<stdin>", 0, llvm::MemoryBuffer::getMemBuffer(Code, "<stdin>"));
568 FileID ID = SourceMgr.createFileID(Files.getFile("<stdin>"), SourceLocation(),
569 clang::SrcMgr::C_User);
570 for (auto I = Replaces.rbegin(), E = Replaces.rend(); I != E; ++I) {
571 Replacement Replace("<stdin>", I->getOffset(), I->getLength(),
572 I->getReplacementText());
573 if (!Replace.apply(Rewrite))
574 return llvm::make_error<ReplacementError>(
575 replacement_error::fail_to_apply, Replace);
578 llvm::raw_string_ostream OS(Result);
579 Rewrite.getEditBuffer(ID).write(OS);
584 std::map<std::string, Replacements> groupReplacementsByFile(
585 FileManager &FileMgr,
586 const std::map<std::string, Replacements> &FileToReplaces) {
587 std::map<std::string, Replacements> Result;
588 llvm::SmallPtrSet<const FileEntry *, 16> ProcessedFileEntries;
589 for (const auto &Entry : FileToReplaces) {
590 const FileEntry *FE = FileMgr.getFile(Entry.first);
592 llvm::errs() << "File path " << Entry.first << " is invalid.\n";
593 else if (ProcessedFileEntries.insert(FE).second)
594 Result[Entry.first] = std::move(Entry.second);
599 } // end namespace tooling
600 } // end namespace clang