1 //===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Run a sanity check on the IR to ensure that Safepoints - if they've been
11 // inserted - were inserted correctly. In particular, look for use of
12 // non-relocated values after a safepoint. It's primary use is to check the
13 // correctness of safepoint insertion immediately after insertion, but it can
14 // also be used to verify that later transforms have not found a way to break
15 // safepoint semenatics.
17 // In its current form, this verify checks a property which is sufficient, but
18 // not neccessary for correctness. There are some cases where an unrelocated
19 // pointer can be used after the safepoint. Consider this example:
23 // (a',b') = safepoint(a,b)
27 // Because it is valid to reorder 'c' above the safepoint, this is legal. In
28 // practice, this is a somewhat uncommon transform, but CodeGenPrep does create
29 // idioms like this. The verifier knows about these cases and avoids reporting
32 //===----------------------------------------------------------------------===//
34 #include "llvm/ADT/DenseSet.h"
35 #include "llvm/ADT/SetOperations.h"
36 #include "llvm/ADT/SetVector.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Value.h"
45 #include "llvm/IR/SafepointIRVerifier.h"
46 #include "llvm/IR/Statepoint.h"
47 #include "llvm/Support/Debug.h"
48 #include "llvm/Support/CommandLine.h"
49 #include "llvm/Support/raw_ostream.h"
51 #define DEBUG_TYPE "safepoint-ir-verifier"
55 /// This option is used for writing test cases. Instead of crashing the program
56 /// when verification fails, report a message to the console (for FileCheck
57 /// usage) and continue execution as if nothing happened.
58 static cl::opt<bool> PrintOnly("safepoint-ir-verifier-print-only",
61 static void Verify(const Function &F, const DominatorTree &DT);
63 struct SafepointIRVerifier : public FunctionPass {
64 static char ID; // Pass identification, replacement for typeid
66 SafepointIRVerifier() : FunctionPass(ID) {
67 initializeSafepointIRVerifierPass(*PassRegistry::getPassRegistry());
70 bool runOnFunction(Function &F) override {
73 return false; // no modifications
76 void getAnalysisUsage(AnalysisUsage &AU) const override {
80 StringRef getPassName() const override { return "safepoint verifier"; }
83 void llvm::verifySafepointIR(Function &F) {
84 SafepointIRVerifier pass;
85 pass.runOnFunction(F);
88 char SafepointIRVerifier::ID = 0;
90 FunctionPass *llvm::createSafepointIRVerifierPass() {
91 return new SafepointIRVerifier();
94 INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir",
95 "Safepoint IR Verifier", false, true)
96 INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir",
97 "Safepoint IR Verifier", false, true)
99 static bool isGCPointerType(Type *T) {
100 if (auto *PT = dyn_cast<PointerType>(T))
101 // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
102 // GC managed heap. We know that a pointer into this heap needs to be
103 // updated and that no other pointer does.
104 return (1 == PT->getAddressSpace());
108 static bool containsGCPtrType(Type *Ty) {
109 if (isGCPointerType(Ty))
111 if (VectorType *VT = dyn_cast<VectorType>(Ty))
112 return isGCPointerType(VT->getScalarType());
113 if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
114 return containsGCPtrType(AT->getElementType());
115 if (StructType *ST = dyn_cast<StructType>(Ty))
116 return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
121 // Debugging aid -- prints a [Begin, End) range of values.
122 template<typename IteratorTy>
123 static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) {
125 while (Begin != End) {
126 OS << **Begin << " ";
132 /// The verifier algorithm is phrased in terms of availability. The set of
133 /// values "available" at a given point in the control flow graph is the set of
134 /// correctly relocated value at that point, and is a subset of the set of
135 /// definitions dominating that point.
137 /// State we compute and track per basic block.
138 struct BasicBlockState {
139 // Set of values available coming in, before the phi nodes
140 DenseSet<const Value *> AvailableIn;
142 // Set of values available going out
143 DenseSet<const Value *> AvailableOut;
145 // AvailableOut minus AvailableIn.
146 // All elements are Instructions
147 DenseSet<const Value *> Contribution;
149 // True if this block contains a safepoint and thus AvailableIn does not
150 // contribute to AvailableOut.
151 bool Cleared = false;
155 /// Gather all the definitions dominating the start of BB into Result. This is
156 /// simply the Defs introduced by every dominating basic block and the function
158 static void GatherDominatingDefs(const BasicBlock *BB,
159 DenseSet<const Value *> &Result,
160 const DominatorTree &DT,
161 DenseMap<const BasicBlock *, BasicBlockState *> &BlockMap) {
162 DomTreeNode *DTN = DT[const_cast<BasicBlock *>(BB)];
164 while (DTN->getIDom()) {
165 DTN = DTN->getIDom();
166 const auto &Defs = BlockMap[DTN->getBlock()]->Contribution;
167 Result.insert(Defs.begin(), Defs.end());
168 // If this block is 'Cleared', then nothing LiveIn to this block can be
169 // available after this block completes. Note: This turns out to be
170 // really important for reducing memory consuption of the initial available
171 // sets and thus peak memory usage by this verifier.
172 if (BlockMap[DTN->getBlock()]->Cleared)
176 for (const Argument &A : BB->getParent()->args())
177 if (containsGCPtrType(A.getType()))
181 /// Model the effect of an instruction on the set of available values.
182 static void TransferInstruction(const Instruction &I, bool &Cleared,
183 DenseSet<const Value *> &Available) {
184 if (isStatepoint(I)) {
187 } else if (containsGCPtrType(I.getType()))
188 Available.insert(&I);
191 /// Compute the AvailableOut set for BB, based on the
192 /// BasicBlockState BBS, which is the BasicBlockState for BB. FirstPass is set
193 /// when the verifier runs for the first time computing the AvailableOut set
195 static void TransferBlock(const BasicBlock *BB,
196 BasicBlockState &BBS, bool FirstPass) {
198 const DenseSet<const Value *> &AvailableIn = BBS.AvailableIn;
199 DenseSet<const Value *> &AvailableOut = BBS.AvailableOut;
202 // AvailableOut does not change no matter how the input changes, just
203 // leave it be. We need to force this calculation the first time so that
204 // we have a AvailableOut at all.
206 AvailableOut = BBS.Contribution;
209 // Otherwise, we need to reduce the AvailableOut set by things which are no
210 // longer in our AvailableIn
211 DenseSet<const Value *> Temp = BBS.Contribution;
212 set_union(Temp, AvailableIn);
213 AvailableOut = std::move(Temp);
216 DEBUG(dbgs() << "Transfered block " << BB->getName() << " from ";
217 PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end());
219 PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end());
223 /// A given derived pointer can have multiple base pointers through phi/selects.
224 /// This type indicates when the base pointer is exclusively constant
225 /// (ExclusivelySomeConstant), and if that constant is proven to be exclusively
226 /// null, we record that as ExclusivelyNull. In all other cases, the BaseType is
229 NonConstant = 1, // Base pointers is not exclusively constant.
231 ExclusivelySomeConstant // Base pointers for a given derived pointer is from a
232 // set of constants, but they are not exclusively
236 /// Return the baseType for Val which states whether Val is exclusively
237 /// derived from constant/null, or not exclusively derived from constant.
238 /// Val is exclusively derived off a constant base when all operands of phi and
239 /// selects are derived off a constant base.
240 static enum BaseType getBaseType(const Value *Val) {
242 SmallVector<const Value *, 32> Worklist;
243 DenseSet<const Value *> Visited;
244 bool isExclusivelyDerivedFromNull = true;
245 Worklist.push_back(Val);
246 // Strip through all the bitcasts and geps to get base pointer. Also check for
247 // the exclusive value when there can be multiple base pointers (through phis
249 while(!Worklist.empty()) {
250 const Value *V = Worklist.pop_back_val();
251 if (!Visited.insert(V).second)
254 if (const auto *CI = dyn_cast<CastInst>(V)) {
255 Worklist.push_back(CI->stripPointerCasts());
258 if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
259 Worklist.push_back(GEP->getPointerOperand());
262 // Push all the incoming values of phi node into the worklist for
264 if (const auto *PN = dyn_cast<PHINode>(V)) {
265 for (Value *InV: PN->incoming_values())
266 Worklist.push_back(InV);
269 if (const auto *SI = dyn_cast<SelectInst>(V)) {
270 // Push in the true and false values
271 Worklist.push_back(SI->getTrueValue());
272 Worklist.push_back(SI->getFalseValue());
275 if (isa<Constant>(V)) {
276 // We found at least one base pointer which is non-null, so this derived
277 // pointer is not exclusively derived from null.
278 if (V != Constant::getNullValue(V->getType()))
279 isExclusivelyDerivedFromNull = false;
280 // Continue processing the remaining values to make sure it's exclusively
284 // At this point, we know that the base pointer is not exclusively
286 return BaseType::NonConstant;
288 // Now, we know that the base pointer is exclusively constant, but we need to
289 // differentiate between exclusive null constant and non-null constant.
290 return isExclusivelyDerivedFromNull ? BaseType::ExclusivelyNull
291 : BaseType::ExclusivelySomeConstant;
294 static void Verify(const Function &F, const DominatorTree &DT) {
295 SpecificBumpPtrAllocator<BasicBlockState> BSAllocator;
296 DenseMap<const BasicBlock *, BasicBlockState *> BlockMap;
298 DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n");
300 dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n";
303 for (const BasicBlock &BB : F) {
304 BasicBlockState *BBS = new(BSAllocator.Allocate()) BasicBlockState;
305 for (const auto &I : BB)
306 TransferInstruction(I, BBS->Cleared, BBS->Contribution);
310 for (auto &BBI : BlockMap) {
311 GatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT, BlockMap);
312 TransferBlock(BBI.first, *BBI.second, true);
315 SetVector<const BasicBlock *> Worklist;
316 for (auto &BBI : BlockMap)
317 Worklist.insert(BBI.first);
319 // This loop iterates the AvailableIn and AvailableOut sets to a fixed point.
320 // The AvailableIn and AvailableOut sets decrease as we iterate.
321 while (!Worklist.empty()) {
322 const BasicBlock *BB = Worklist.pop_back_val();
323 BasicBlockState *BBS = BlockMap[BB];
325 size_t OldInCount = BBS->AvailableIn.size();
326 for (const BasicBlock *PBB : predecessors(BB))
327 set_intersect(BBS->AvailableIn, BlockMap[PBB]->AvailableOut);
329 if (OldInCount == BBS->AvailableIn.size())
332 assert(OldInCount > BBS->AvailableIn.size() && "invariant!");
334 size_t OldOutCount = BBS->AvailableOut.size();
335 TransferBlock(BB, *BBS, false);
336 if (OldOutCount != BBS->AvailableOut.size()) {
337 assert(OldOutCount > BBS->AvailableOut.size() && "invariant!");
338 Worklist.insert(succ_begin(BB), succ_end(BB));
342 // We now have all the information we need to decide if the use of a heap
343 // reference is legal or not, given our safepoint semantics.
345 bool AnyInvalidUses = false;
347 auto ReportInvalidUse = [&AnyInvalidUses](const Value &V,
348 const Instruction &I) {
349 errs() << "Illegal use of unrelocated value found!\n";
350 errs() << "Def: " << V << "\n";
351 errs() << "Use: " << I << "\n";
354 AnyInvalidUses = true;
357 auto isNotExclusivelyConstantDerived = [](const Value *V) {
358 return getBaseType(V) == BaseType::NonConstant;
361 for (const BasicBlock &BB : F) {
362 // We destructively modify AvailableIn as we traverse the block instruction
364 DenseSet<const Value *> &AvailableSet = BlockMap[&BB]->AvailableIn;
365 for (const Instruction &I : BB) {
366 if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
367 if (containsGCPtrType(PN->getType()))
368 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
369 const BasicBlock *InBB = PN->getIncomingBlock(i);
370 const Value *InValue = PN->getIncomingValue(i);
372 if (isNotExclusivelyConstantDerived(InValue) &&
373 !BlockMap[InBB]->AvailableOut.count(InValue))
374 ReportInvalidUse(*InValue, *PN);
376 } else if (isa<CmpInst>(I) &&
377 containsGCPtrType(I.getOperand(0)->getType())) {
378 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
379 enum BaseType baseTyLHS = getBaseType(LHS),
380 baseTyRHS = getBaseType(RHS);
382 // Returns true if LHS and RHS are unrelocated pointers and they are
383 // valid unrelocated uses.
384 auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS, &RHS] () {
385 // A cmp instruction has valid unrelocated pointer operands only if
386 // both operands are unrelocated pointers.
387 // In the comparison between two pointers, if one is an unrelocated
388 // use, the other *should be* an unrelocated use, for this
389 // instruction to contain valid unrelocated uses. This unrelocated
390 // use can be a null constant as well, or another unrelocated
392 if (AvailableSet.count(LHS) || AvailableSet.count(RHS))
394 // Constant pointers (that are not exclusively null) may have
395 // meaning in different VMs, so we cannot reorder the compare
396 // against constant pointers before the safepoint. In other words,
397 // comparison of an unrelocated use against a non-null constant
399 if ((baseTyLHS == BaseType::ExclusivelySomeConstant &&
400 baseTyRHS == BaseType::NonConstant) ||
401 (baseTyLHS == BaseType::NonConstant &&
402 baseTyRHS == BaseType::ExclusivelySomeConstant))
404 // All other cases are valid cases enumerated below:
405 // 1. Comparison between an exlusively derived null pointer and a
406 // constant base pointer.
407 // 2. Comparison between an exlusively derived null pointer and a
408 // non-constant unrelocated base pointer.
409 // 3. Comparison between 2 unrelocated pointers.
412 if (!hasValidUnrelocatedUse()) {
413 // Print out all non-constant derived pointers that are unrelocated
414 // uses, which are invalid.
415 if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS))
416 ReportInvalidUse(*LHS, I);
417 if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS))
418 ReportInvalidUse(*RHS, I);
421 for (const Value *V : I.operands())
422 if (containsGCPtrType(V->getType()) &&
423 isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V))
424 ReportInvalidUse(*V, I);
427 bool Cleared = false;
428 TransferInstruction(I, Cleared, AvailableSet);
433 if (PrintOnly && !AnyInvalidUses) {
434 dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName()