1 //===- Replacement.cpp - Framework for clang refactoring tools ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Implements classes to support/store refactorings.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Tooling/Core/Replacement.h"
15 #include "clang/Basic/Diagnostic.h"
16 #include "clang/Basic/DiagnosticIDs.h"
17 #include "clang/Basic/DiagnosticOptions.h"
18 #include "clang/Basic/FileManager.h"
19 #include "clang/Basic/FileSystemOptions.h"
20 #include "clang/Basic/SourceLocation.h"
21 #include "clang/Basic/SourceManager.h"
22 #include "clang/Lex/Lexer.h"
23 #include "clang/Rewrite/Core/RewriteBuffer.h"
24 #include "clang/Rewrite/Core/Rewriter.h"
25 #include "llvm/ADT/IntrusiveRefCntPtr.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/Error.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/MemoryBuffer.h"
31 #include "llvm/Support/VirtualFileSystem.h"
32 #include "llvm/Support/raw_ostream.h"
41 using namespace clang;
42 using namespace tooling;
44 static const char * const InvalidLocation = "";
46 Replacement::Replacement() : FilePath(InvalidLocation) {}
48 Replacement::Replacement(StringRef FilePath, unsigned Offset, unsigned Length,
49 StringRef ReplacementText)
50 : FilePath(FilePath), ReplacementRange(Offset, Length),
51 ReplacementText(ReplacementText) {}
53 Replacement::Replacement(const SourceManager &Sources, SourceLocation Start,
54 unsigned Length, StringRef ReplacementText) {
55 setFromSourceLocation(Sources, Start, Length, ReplacementText);
58 Replacement::Replacement(const SourceManager &Sources,
59 const CharSourceRange &Range,
60 StringRef ReplacementText,
61 const LangOptions &LangOpts) {
62 setFromSourceRange(Sources, Range, ReplacementText, LangOpts);
65 bool Replacement::isApplicable() const {
66 return FilePath != InvalidLocation;
69 bool Replacement::apply(Rewriter &Rewrite) const {
70 SourceManager &SM = Rewrite.getSourceMgr();
71 const FileEntry *Entry = SM.getFileManager().getFile(FilePath);
75 FileID ID = SM.getOrCreateFileID(Entry, SrcMgr::C_User);
76 const SourceLocation Start =
77 SM.getLocForStartOfFile(ID).
78 getLocWithOffset(ReplacementRange.getOffset());
79 // ReplaceText returns false on success.
80 // ReplaceText only fails if the source location is not a file location, in
81 // which case we already returned false earlier.
82 bool RewriteSucceeded = !Rewrite.ReplaceText(
83 Start, ReplacementRange.getLength(), ReplacementText);
84 assert(RewriteSucceeded);
85 return RewriteSucceeded;
88 std::string Replacement::toString() const {
90 llvm::raw_string_ostream Stream(Result);
91 Stream << FilePath << ": " << ReplacementRange.getOffset() << ":+"
92 << ReplacementRange.getLength() << ":\"" << ReplacementText << "\"";
99 bool operator<(const Replacement &LHS, const Replacement &RHS) {
100 if (LHS.getOffset() != RHS.getOffset())
101 return LHS.getOffset() < RHS.getOffset();
103 if (LHS.getLength() != RHS.getLength())
104 return LHS.getLength() < RHS.getLength();
106 if (LHS.getFilePath() != RHS.getFilePath())
107 return LHS.getFilePath() < RHS.getFilePath();
108 return LHS.getReplacementText() < RHS.getReplacementText();
111 bool operator==(const Replacement &LHS, const Replacement &RHS) {
112 return LHS.getOffset() == RHS.getOffset() &&
113 LHS.getLength() == RHS.getLength() &&
114 LHS.getFilePath() == RHS.getFilePath() &&
115 LHS.getReplacementText() == RHS.getReplacementText();
118 } // namespace tooling
121 void Replacement::setFromSourceLocation(const SourceManager &Sources,
122 SourceLocation Start, unsigned Length,
123 StringRef ReplacementText) {
124 const std::pair<FileID, unsigned> DecomposedLocation =
125 Sources.getDecomposedLoc(Start);
126 const FileEntry *Entry = Sources.getFileEntryForID(DecomposedLocation.first);
127 this->FilePath = Entry ? Entry->getName() : InvalidLocation;
128 this->ReplacementRange = Range(DecomposedLocation.second, Length);
129 this->ReplacementText = ReplacementText;
132 // FIXME: This should go into the Lexer, but we need to figure out how
133 // to handle ranges for refactoring in general first - there is no obvious
134 // good way how to integrate this into the Lexer yet.
135 static int getRangeSize(const SourceManager &Sources,
136 const CharSourceRange &Range,
137 const LangOptions &LangOpts) {
138 SourceLocation SpellingBegin = Sources.getSpellingLoc(Range.getBegin());
139 SourceLocation SpellingEnd = Sources.getSpellingLoc(Range.getEnd());
140 std::pair<FileID, unsigned> Start = Sources.getDecomposedLoc(SpellingBegin);
141 std::pair<FileID, unsigned> End = Sources.getDecomposedLoc(SpellingEnd);
142 if (Start.first != End.first) return -1;
143 if (Range.isTokenRange())
144 End.second += Lexer::MeasureTokenLength(SpellingEnd, Sources, LangOpts);
145 return End.second - Start.second;
148 void Replacement::setFromSourceRange(const SourceManager &Sources,
149 const CharSourceRange &Range,
150 StringRef ReplacementText,
151 const LangOptions &LangOpts) {
152 setFromSourceLocation(Sources, Sources.getSpellingLoc(Range.getBegin()),
153 getRangeSize(Sources, Range, LangOpts),
158 Replacements::getReplacementInChangedCode(const Replacement &R) const {
159 unsigned NewStart = getShiftedCodePosition(R.getOffset());
160 unsigned NewEnd = getShiftedCodePosition(R.getOffset() + R.getLength());
161 return Replacement(R.getFilePath(), NewStart, NewEnd - NewStart,
162 R.getReplacementText());
165 static std::string getReplacementErrString(replacement_error Err) {
167 case replacement_error::fail_to_apply:
168 return "Failed to apply a replacement.";
169 case replacement_error::wrong_file_path:
170 return "The new replacement's file path is different from the file path of "
171 "existing replacements";
172 case replacement_error::overlap_conflict:
173 return "The new replacement overlaps with an existing replacement.";
174 case replacement_error::insert_conflict:
175 return "The new insertion has the same insert location as an existing "
178 llvm_unreachable("A value of replacement_error has no message.");
181 std::string ReplacementError::message() const {
182 std::string Message = getReplacementErrString(Err);
183 if (NewReplacement.hasValue())
184 Message += "\nNew replacement: " + NewReplacement->toString();
185 if (ExistingReplacement.hasValue())
186 Message += "\nExisting replacement: " + ExistingReplacement->toString();
190 char ReplacementError::ID = 0;
192 Replacements Replacements::getCanonicalReplacements() const {
193 std::vector<Replacement> NewReplaces;
194 // Merge adjacent replacements.
195 for (const auto &R : Replaces) {
196 if (NewReplaces.empty()) {
197 NewReplaces.push_back(R);
200 auto &Prev = NewReplaces.back();
201 unsigned PrevEnd = Prev.getOffset() + Prev.getLength();
202 if (PrevEnd < R.getOffset()) {
203 NewReplaces.push_back(R);
205 assert(PrevEnd == R.getOffset() &&
206 "Existing replacements must not overlap.");
208 R.getFilePath(), Prev.getOffset(), Prev.getLength() + R.getLength(),
209 (Prev.getReplacementText() + R.getReplacementText()).str());
213 ReplacementsImpl NewReplacesImpl(NewReplaces.begin(), NewReplaces.end());
214 return Replacements(NewReplacesImpl.begin(), NewReplacesImpl.end());
217 // `R` and `Replaces` are order-independent if applying them in either order
218 // has the same effect, so we need to compare replacements associated to
219 // applying them in either order.
220 llvm::Expected<Replacements>
221 Replacements::mergeIfOrderIndependent(const Replacement &R) const {
223 // A Replacements set containing a single replacement that is `R` referring to
224 // the code after the existing replacements `Replaces` are applied.
225 Replacements RsShiftedByReplaces(getReplacementInChangedCode(R));
226 // A Replacements set that is `Replaces` referring to the code after `R` is
228 Replacements ReplacesShiftedByRs;
229 for (const auto &Replace : Replaces)
230 ReplacesShiftedByRs.Replaces.insert(
231 Rs.getReplacementInChangedCode(Replace));
232 // This is equivalent to applying `Replaces` first and then `R`.
233 auto MergeShiftedRs = merge(RsShiftedByReplaces);
234 // This is equivalent to applying `R` first and then `Replaces`.
235 auto MergeShiftedReplaces = Rs.merge(ReplacesShiftedByRs);
237 // Since empty or segmented replacements around existing replacements might be
238 // produced above, we need to compare replacements in canonical forms.
239 if (MergeShiftedRs.getCanonicalReplacements() ==
240 MergeShiftedReplaces.getCanonicalReplacements())
241 return MergeShiftedRs;
242 return llvm::make_error<ReplacementError>(replacement_error::overlap_conflict,
243 R, *Replaces.begin());
246 llvm::Error Replacements::add(const Replacement &R) {
247 // Check the file path.
248 if (!Replaces.empty() && R.getFilePath() != Replaces.begin()->getFilePath())
249 return llvm::make_error<ReplacementError>(
250 replacement_error::wrong_file_path, R, *Replaces.begin());
252 // Special-case header insertions.
253 if (R.getOffset() == std::numeric_limits<unsigned>::max()) {
255 return llvm::Error::success();
258 // This replacement cannot conflict with replacements that end before
259 // this replacement starts or start after this replacement ends.
260 // We also know that there currently are no overlapping replacements.
261 // Thus, we know that all replacements that start after the end of the current
262 // replacement cannot overlap.
263 Replacement AtEnd(R.getFilePath(), R.getOffset() + R.getLength(), 0, "");
265 // Find the first entry that starts after or at the end of R. Note that
266 // entries that start at the end can still be conflicting if R is an
268 auto I = Replaces.lower_bound(AtEnd);
269 // If `I` starts at the same offset as `R`, `R` must be an insertion.
270 if (I != Replaces.end() && R.getOffset() == I->getOffset()) {
271 assert(R.getLength() == 0);
272 // `I` is also an insertion, `R` and `I` conflict.
273 if (I->getLength() == 0) {
274 // Check if two insertions are order-indepedent: if inserting them in
275 // either order produces the same text, they are order-independent.
276 if ((R.getReplacementText() + I->getReplacementText()).str() !=
277 (I->getReplacementText() + R.getReplacementText()).str())
278 return llvm::make_error<ReplacementError>(
279 replacement_error::insert_conflict, R, *I);
280 // If insertions are order-independent, we can merge them.
282 R.getFilePath(), R.getOffset(), 0,
283 (R.getReplacementText() + I->getReplacementText()).str());
285 Replaces.insert(std::move(NewR));
286 return llvm::Error::success();
288 // Insertion `R` is adjacent to a non-insertion replacement `I`, so they
289 // are order-independent. It is safe to assume that `R` will not conflict
290 // with any replacement before `I` since all replacements before `I` must
291 // either end before `R` or end at `R` but has length > 0 (if the
292 // replacement before `I` is an insertion at `R`, it would have been `I`
293 // since it is a lower bound of `AtEnd` and ordered before the current `I`
296 return llvm::Error::success();
299 // `I` is the smallest iterator (after `R`) whose entry cannot overlap.
300 // If that is begin(), there are no overlaps.
301 if (I == Replaces.begin()) {
303 return llvm::Error::success();
306 auto Overlap = [](const Replacement &R1, const Replacement &R2) -> bool {
307 return Range(R1.getOffset(), R1.getLength())
308 .overlapsWith(Range(R2.getOffset(), R2.getLength()));
310 // If the previous entry does not overlap, we know that entries before it
311 // can also not overlap.
312 if (!Overlap(R, *I)) {
313 // If `R` and `I` do not have the same offset, it is safe to add `R` since
314 // it must come after `I`. Otherwise:
315 // - If `R` is an insertion, `I` must not be an insertion since it would
316 // have come after `AtEnd`.
317 // - If `R` is not an insertion, `I` must be an insertion; otherwise, `R`
318 // and `I` would have overlapped.
319 // In either case, we can safely insert `R`.
322 // `I` overlaps with `R`. We need to check `R` against all overlapping
323 // replacements to see if they are order-indepedent. If they are, merge `R`
324 // with them and replace them with the merged replacements.
326 auto MergeEnd = std::next(I);
327 while (I != Replaces.begin()) {
329 // If `I` doesn't overlap with `R`, don't merge it.
334 Replacements OverlapReplaces(MergeBegin, MergeEnd);
335 llvm::Expected<Replacements> Merged =
336 OverlapReplaces.mergeIfOrderIndependent(R);
338 return Merged.takeError();
339 Replaces.erase(MergeBegin, MergeEnd);
340 Replaces.insert(Merged->begin(), Merged->end());
342 return llvm::Error::success();
347 // Represents a merged replacement, i.e. a replacement consisting of multiple
348 // overlapping replacements from 'First' and 'Second' in mergeReplacements.
350 // Position projection:
351 // Offsets and lengths of the replacements can generally refer to two different
352 // coordinate spaces. Replacements from 'First' refer to the original text
353 // whereas replacements from 'Second' refer to the text after applying 'First'.
355 // MergedReplacement always operates in the coordinate space of the original
356 // text, i.e. transforms elements from 'Second' to take into account what was
357 // changed based on the elements from 'First'.
359 // We can correctly calculate this projection as we look at the replacements in
360 // order of strictly increasing offsets.
363 // * We always merge elements from 'First' into elements from 'Second' and vice
364 // versa. Within each set, the replacements are non-overlapping.
365 // * We only extend to the right, i.e. merge elements with strictly increasing
367 class MergedReplacement {
369 MergedReplacement(const Replacement &R, bool MergeSecond, int D)
370 : MergeSecond(MergeSecond), Delta(D), FilePath(R.getFilePath()),
371 Offset(R.getOffset() + (MergeSecond ? 0 : Delta)), Length(R.getLength()),
372 Text(R.getReplacementText()) {
373 Delta += MergeSecond ? 0 : Text.size() - Length;
374 DeltaFirst = MergeSecond ? Text.size() - Length : 0;
377 // Merges the next element 'R' into this merged element. As we always merge
378 // from 'First' into 'Second' or vice versa, the MergedReplacement knows what
379 // set the next element is coming from.
380 void merge(const Replacement &R) {
382 unsigned REnd = R.getOffset() + Delta + R.getLength();
383 unsigned End = Offset + Text.size();
385 Length += REnd - End;
388 StringRef TextRef = Text;
389 StringRef Head = TextRef.substr(0, R.getOffset() + Delta - Offset);
390 StringRef Tail = TextRef.substr(REnd - Offset);
391 Text = (Head + R.getReplacementText() + Tail).str();
392 Delta += R.getReplacementText().size() - R.getLength();
394 unsigned End = Offset + Length;
395 StringRef RText = R.getReplacementText();
396 StringRef Tail = RText.substr(End - R.getOffset());
397 Text = (Text + Tail).str();
398 if (R.getOffset() + RText.size() > End) {
399 Length = R.getOffset() + R.getLength() - Offset;
402 Length += R.getLength() - RText.size();
404 DeltaFirst += RText.size() - R.getLength();
408 // Returns 'true' if 'R' starts strictly after the MergedReplacement and thus
409 // doesn't need to be merged.
410 bool endsBefore(const Replacement &R) const {
412 return Offset + Text.size() < R.getOffset() + Delta;
413 return Offset + Length < R.getOffset();
416 // Returns 'true' if an element from the second set should be merged next.
417 bool mergeSecond() const { return MergeSecond; }
419 int deltaFirst() const { return DeltaFirst; }
420 Replacement asReplacement() const { return {FilePath, Offset, Length, Text}; }
425 // Amount of characters that elements from 'Second' need to be shifted by in
426 // order to refer to the original text.
429 // Sum of all deltas (text-length - length) of elements from 'First' merged
430 // into this element. This is used to update 'Delta' once the
431 // MergedReplacement is completed.
434 // Data of the actually merged replacement. FilePath and Offset aren't changed
435 // as the element is only extended to the right.
436 const StringRef FilePath;
437 const unsigned Offset;
444 Replacements Replacements::merge(const Replacements &ReplacesToMerge) const {
445 if (empty() || ReplacesToMerge.empty())
446 return empty() ? ReplacesToMerge : *this;
448 auto &First = Replaces;
449 auto &Second = ReplacesToMerge.Replaces;
450 // Delta is the amount of characters that replacements from 'Second' need to
451 // be shifted so that their offsets refer to the original text.
453 ReplacementsImpl Result;
455 // Iterate over both sets and always add the next element (smallest total
456 // Offset) from either 'First' or 'Second'. Merge that element with
457 // subsequent replacements as long as they overlap. See more details in the
458 // comment on MergedReplacement.
459 for (auto FirstI = First.begin(), SecondI = Second.begin();
460 FirstI != First.end() || SecondI != Second.end();) {
461 bool NextIsFirst = SecondI == Second.end() ||
462 (FirstI != First.end() &&
463 FirstI->getOffset() < SecondI->getOffset() + Delta);
464 MergedReplacement Merged(NextIsFirst ? *FirstI : *SecondI, NextIsFirst,
466 ++(NextIsFirst ? FirstI : SecondI);
468 while ((Merged.mergeSecond() && SecondI != Second.end()) ||
469 (!Merged.mergeSecond() && FirstI != First.end())) {
470 auto &I = Merged.mergeSecond() ? SecondI : FirstI;
471 if (Merged.endsBefore(*I))
476 Delta -= Merged.deltaFirst();
477 Result.insert(Merged.asReplacement());
479 return Replacements(Result.begin(), Result.end());
482 // Combines overlapping ranges in \p Ranges and sorts the combined ranges.
483 // Returns a set of non-overlapping and sorted ranges that is equivalent to
485 static std::vector<Range> combineAndSortRanges(std::vector<Range> Ranges) {
486 llvm::sort(Ranges, [](const Range &LHS, const Range &RHS) {
487 if (LHS.getOffset() != RHS.getOffset())
488 return LHS.getOffset() < RHS.getOffset();
489 return LHS.getLength() < RHS.getLength();
491 std::vector<Range> Result;
492 for (const auto &R : Ranges) {
493 if (Result.empty() ||
494 Result.back().getOffset() + Result.back().getLength() < R.getOffset()) {
498 std::max(Result.back().getOffset() + Result.back().getLength(),
499 R.getOffset() + R.getLength());
500 Result[Result.size() - 1] =
501 Range(Result.back().getOffset(), NewEnd - Result.back().getOffset());
511 calculateRangesAfterReplacements(const Replacements &Replaces,
512 const std::vector<Range> &Ranges) {
513 // To calculate the new ranges,
514 // - Turn \p Ranges into Replacements at (offset, length) with an empty
515 // (unimportant) replacement text of length "length".
516 // - Merge with \p Replaces.
517 // - The new ranges will be the affected ranges of the merged replacements.
518 auto MergedRanges = combineAndSortRanges(Ranges);
519 if (Replaces.empty())
521 tooling::Replacements FakeReplaces;
522 for (const auto &R : MergedRanges) {
523 auto Err = FakeReplaces.add(Replacement(Replaces.begin()->getFilePath(),
524 R.getOffset(), R.getLength(),
525 std::string(R.getLength(), ' ')));
527 "Replacements must not conflict since ranges have been merged.");
528 llvm::consumeError(std::move(Err));
530 return FakeReplaces.merge(Replaces).getAffectedRanges();
533 } // namespace tooling
536 std::vector<Range> Replacements::getAffectedRanges() const {
537 std::vector<Range> ChangedRanges;
539 for (const auto &R : Replaces) {
540 unsigned Offset = R.getOffset() + Shift;
541 unsigned Length = R.getReplacementText().size();
542 Shift += Length - R.getLength();
543 ChangedRanges.push_back(Range(Offset, Length));
545 return combineAndSortRanges(ChangedRanges);
548 unsigned Replacements::getShiftedCodePosition(unsigned Position) const {
550 for (const auto &R : Replaces) {
551 if (R.getOffset() + R.getLength() <= Position) {
552 Offset += R.getReplacementText().size() - R.getLength();
555 if (R.getOffset() < Position &&
556 R.getOffset() + R.getReplacementText().size() <= Position) {
557 Position = R.getOffset() + R.getReplacementText().size();
558 if (!R.getReplacementText().empty())
563 return Position + Offset;
569 bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite) {
571 for (auto I = Replaces.rbegin(), E = Replaces.rend(); I != E; ++I) {
572 if (I->isApplicable()) {
573 Result = I->apply(Rewrite) && Result;
581 llvm::Expected<std::string> applyAllReplacements(StringRef Code,
582 const Replacements &Replaces) {
583 if (Replaces.empty())
586 IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
587 new llvm::vfs::InMemoryFileSystem);
588 FileManager Files(FileSystemOptions(), InMemoryFileSystem);
589 DiagnosticsEngine Diagnostics(
590 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
591 new DiagnosticOptions);
592 SourceManager SourceMgr(Diagnostics, Files);
593 Rewriter Rewrite(SourceMgr, LangOptions());
594 InMemoryFileSystem->addFile(
595 "<stdin>", 0, llvm::MemoryBuffer::getMemBuffer(Code, "<stdin>"));
596 FileID ID = SourceMgr.createFileID(Files.getFile("<stdin>"), SourceLocation(),
597 clang::SrcMgr::C_User);
598 for (auto I = Replaces.rbegin(), E = Replaces.rend(); I != E; ++I) {
599 Replacement Replace("<stdin>", I->getOffset(), I->getLength(),
600 I->getReplacementText());
601 if (!Replace.apply(Rewrite))
602 return llvm::make_error<ReplacementError>(
603 replacement_error::fail_to_apply, Replace);
606 llvm::raw_string_ostream OS(Result);
607 Rewrite.getEditBuffer(ID).write(OS);
612 std::map<std::string, Replacements> groupReplacementsByFile(
613 FileManager &FileMgr,
614 const std::map<std::string, Replacements> &FileToReplaces) {
615 std::map<std::string, Replacements> Result;
616 llvm::SmallPtrSet<const FileEntry *, 16> ProcessedFileEntries;
617 for (const auto &Entry : FileToReplaces) {
618 const FileEntry *FE = FileMgr.getFile(Entry.first);
620 llvm::errs() << "File path " << Entry.first << " is invalid.\n";
621 else if (ProcessedFileEntries.insert(FE).second)
622 Result[Entry.first] = std::move(Entry.second);
627 } // namespace tooling