1 //======- CFLGraph.h - Abstract stratified sets implementation. --------======//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// This file defines CFLGraph, an auxiliary data structure used by CFL-based
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_ANALYSIS_CFLGRAPH_H
16 #define LLVM_ANALYSIS_CFLGRAPH_H
18 #include "AliasAnalysisSummary.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/Analysis/MemoryBuiltins.h"
21 #include "llvm/IR/InstVisitor.h"
22 #include "llvm/IR/Instructions.h"
27 /// \brief The Program Expression Graph (PEG) of CFL analysis
28 /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
29 /// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
30 /// the main purpose of this graph is to abstract away unrelated facts and
31 /// translate the rest into a form that can be easily digested by CFL analyses.
32 /// Each Node in the graph is an InstantiatedValue, and each edge represent a
33 /// pointer assignment between InstantiatedValue. Pointer
34 /// references/dereferences are not explicitly stored in the graph: we
35 /// implicitly assume that for each node (X, I) it has a dereference edge to (X,
36 /// I+1) and a reference edge to (X, I-1).
39 typedef InstantiatedValue Node;
46 typedef std::vector<Edge> EdgeList;
49 EdgeList Edges, ReverseEdges;
54 std::vector<NodeInfo> Levels;
57 bool addNodeToLevel(unsigned Level) {
58 auto NumLevels = Levels.size();
59 if (NumLevels > Level)
61 Levels.resize(Level + 1);
65 NodeInfo &getNodeInfoAtLevel(unsigned Level) {
66 assert(Level < Levels.size());
69 const NodeInfo &getNodeInfoAtLevel(unsigned Level) const {
70 assert(Level < Levels.size());
74 unsigned getNumLevels() const { return Levels.size(); }
78 typedef DenseMap<Value *, ValueInfo> ValueMap;
81 NodeInfo *getNode(Node N) {
82 auto Itr = ValueImpls.find(N.Val);
83 if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
85 return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
89 typedef ValueMap::const_iterator const_value_iterator;
91 bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) {
92 assert(N.Val != nullptr);
93 auto &ValInfo = ValueImpls[N.Val];
94 auto Changed = ValInfo.addNodeToLevel(N.DerefLevel);
95 ValInfo.getNodeInfoAtLevel(N.DerefLevel).Attr |= Attr;
99 void addAttr(Node N, AliasAttrs Attr) {
100 auto *Info = getNode(N);
101 assert(Info != nullptr);
105 void addEdge(Node From, Node To, int64_t Offset = 0) {
106 auto *FromInfo = getNode(From);
107 assert(FromInfo != nullptr);
108 auto *ToInfo = getNode(To);
109 assert(ToInfo != nullptr);
111 FromInfo->Edges.push_back(Edge{To, Offset});
112 ToInfo->ReverseEdges.push_back(Edge{From, Offset});
115 const NodeInfo *getNode(Node N) const {
116 auto Itr = ValueImpls.find(N.Val);
117 if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
119 return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
122 AliasAttrs attrFor(Node N) const {
123 auto *Info = getNode(N);
124 assert(Info != nullptr);
128 iterator_range<const_value_iterator> value_mappings() const {
129 return make_range<const_value_iterator>(ValueImpls.begin(),
134 ///\brief A builder class used to create CFLGraph instance from a given function
135 /// The CFL-AA that uses this builder must provide its own type as a template
136 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder
137 /// needs a way of obtaining the summary of other functions when callinsts are
139 /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
140 /// member function that takes a Function& and returns the corresponding summary
141 /// as a const AliasSummary*.
142 template <typename CFLAA> class CFLGraphBuilder {
143 // Input of the builder
145 const TargetLibraryInfo &TLI;
147 // Output of the builder
149 SmallVector<Value *, 4> ReturnedValues;
152 /// Gets the edges our graph should have, based on an Instruction*
153 class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
155 const DataLayout &DL;
156 const TargetLibraryInfo &TLI;
159 SmallVectorImpl<Value *> &ReturnValues;
161 static bool hasUsefulEdges(ConstantExpr *CE) {
162 // ConstantExpr doesn't have terminators, invokes, or fences, so only
164 // to check for compares.
165 return CE->getOpcode() != Instruction::ICmp &&
166 CE->getOpcode() != Instruction::FCmp;
169 // Returns possible functions called by CS into the given SmallVectorImpl.
170 // Returns true if targets found, false otherwise.
171 static bool getPossibleTargets(CallSite CS,
172 SmallVectorImpl<Function *> &Output) {
173 if (auto *Fn = CS.getCalledFunction()) {
174 Output.push_back(Fn);
178 // TODO: If the call is indirect, we might be able to enumerate all
180 // targets of the call and return them, rather than just failing.
184 void addNode(Value *Val, AliasAttrs Attr = AliasAttrs()) {
185 assert(Val != nullptr && Val->getType()->isPointerTy());
186 if (auto GVal = dyn_cast<GlobalValue>(Val)) {
187 if (Graph.addNode(InstantiatedValue{GVal, 0},
188 getGlobalOrArgAttrFromValue(*GVal)))
189 Graph.addNode(InstantiatedValue{GVal, 1}, getAttrUnknown());
190 } else if (auto CExpr = dyn_cast<ConstantExpr>(Val)) {
191 if (hasUsefulEdges(CExpr)) {
192 if (Graph.addNode(InstantiatedValue{CExpr, 0}))
193 visitConstantExpr(CExpr);
196 Graph.addNode(InstantiatedValue{Val, 0}, Attr);
199 void addAssignEdge(Value *From, Value *To, int64_t Offset = 0) {
200 assert(From != nullptr && To != nullptr);
201 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
206 Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 0},
211 void addDerefEdge(Value *From, Value *To, bool IsRead) {
212 assert(From != nullptr && To != nullptr);
213 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
218 Graph.addNode(InstantiatedValue{From, 1});
219 Graph.addEdge(InstantiatedValue{From, 1}, InstantiatedValue{To, 0});
221 Graph.addNode(InstantiatedValue{To, 1});
222 Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 1});
226 void addLoadEdge(Value *From, Value *To) { addDerefEdge(From, To, true); }
227 void addStoreEdge(Value *From, Value *To) { addDerefEdge(From, To, false); }
230 GetEdgesVisitor(CFLGraphBuilder &Builder, const DataLayout &DL)
231 : AA(Builder.Analysis), DL(DL), TLI(Builder.TLI), Graph(Builder.Graph),
232 ReturnValues(Builder.ReturnedValues) {}
234 void visitInstruction(Instruction &) {
235 llvm_unreachable("Unsupported instruction encountered");
238 void visitReturnInst(ReturnInst &Inst) {
239 if (auto RetVal = Inst.getReturnValue()) {
240 if (RetVal->getType()->isPointerTy()) {
242 ReturnValues.push_back(RetVal);
247 void visitPtrToIntInst(PtrToIntInst &Inst) {
248 auto *Ptr = Inst.getOperand(0);
249 addNode(Ptr, getAttrEscaped());
252 void visitIntToPtrInst(IntToPtrInst &Inst) {
254 addNode(Ptr, getAttrUnknown());
257 void visitCastInst(CastInst &Inst) {
258 auto *Src = Inst.getOperand(0);
259 addAssignEdge(Src, &Inst);
262 void visitBinaryOperator(BinaryOperator &Inst) {
263 auto *Op1 = Inst.getOperand(0);
264 auto *Op2 = Inst.getOperand(1);
265 addAssignEdge(Op1, &Inst);
266 addAssignEdge(Op2, &Inst);
269 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
270 auto *Ptr = Inst.getPointerOperand();
271 auto *Val = Inst.getNewValOperand();
272 addStoreEdge(Val, Ptr);
275 void visitAtomicRMWInst(AtomicRMWInst &Inst) {
276 auto *Ptr = Inst.getPointerOperand();
277 auto *Val = Inst.getValOperand();
278 addStoreEdge(Val, Ptr);
281 void visitPHINode(PHINode &Inst) {
282 for (Value *Val : Inst.incoming_values())
283 addAssignEdge(Val, &Inst);
286 void visitGEP(GEPOperator &GEPOp) {
287 uint64_t Offset = UnknownOffset;
288 APInt APOffset(DL.getPointerSizeInBits(GEPOp.getPointerAddressSpace()),
290 if (GEPOp.accumulateConstantOffset(DL, APOffset))
291 Offset = APOffset.getSExtValue();
293 auto *Op = GEPOp.getPointerOperand();
294 addAssignEdge(Op, &GEPOp, Offset);
297 void visitGetElementPtrInst(GetElementPtrInst &Inst) {
298 auto *GEPOp = cast<GEPOperator>(&Inst);
302 void visitSelectInst(SelectInst &Inst) {
303 // Condition is not processed here (The actual statement producing
304 // the condition result is processed elsewhere). For select, the
305 // condition is evaluated, but not loaded, stored, or assigned
306 // simply as a result of being the condition of a select.
308 auto *TrueVal = Inst.getTrueValue();
309 auto *FalseVal = Inst.getFalseValue();
310 addAssignEdge(TrueVal, &Inst);
311 addAssignEdge(FalseVal, &Inst);
314 void visitAllocaInst(AllocaInst &Inst) { addNode(&Inst); }
316 void visitLoadInst(LoadInst &Inst) {
317 auto *Ptr = Inst.getPointerOperand();
319 addLoadEdge(Ptr, Val);
322 void visitStoreInst(StoreInst &Inst) {
323 auto *Ptr = Inst.getPointerOperand();
324 auto *Val = Inst.getValueOperand();
325 addStoreEdge(Val, Ptr);
328 void visitVAArgInst(VAArgInst &Inst) {
329 // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
332 // 1. Loads a value from *((T*)*Ptr).
333 // 2. Increments (stores to) *Ptr by some target-specific amount.
334 // For now, we'll handle this like a landingpad instruction (by placing
336 // result in its own group, and having that group alias externals).
337 if (Inst.getType()->isPointerTy())
338 addNode(&Inst, getAttrUnknown());
341 static bool isFunctionExternal(Function *Fn) {
342 return !Fn->hasExactDefinition();
345 bool tryInterproceduralAnalysis(CallSite CS,
346 const SmallVectorImpl<Function *> &Fns) {
347 assert(Fns.size() > 0);
349 if (CS.arg_size() > MaxSupportedArgsInSummary)
352 // Exit early if we'll fail anyway
353 for (auto *Fn : Fns) {
354 if (isFunctionExternal(Fn) || Fn->isVarArg())
356 // Fail if the caller does not provide enough arguments
357 assert(Fn->arg_size() <= CS.arg_size());
358 if (!AA.getAliasSummary(*Fn))
362 for (auto *Fn : Fns) {
363 auto Summary = AA.getAliasSummary(*Fn);
364 assert(Summary != nullptr);
366 auto &RetParamRelations = Summary->RetParamRelations;
367 for (auto &Relation : RetParamRelations) {
368 auto IRelation = instantiateExternalRelation(Relation, CS);
369 if (IRelation.hasValue()) {
370 Graph.addNode(IRelation->From);
371 Graph.addNode(IRelation->To);
372 Graph.addEdge(IRelation->From, IRelation->To);
376 auto &RetParamAttributes = Summary->RetParamAttributes;
377 for (auto &Attribute : RetParamAttributes) {
378 auto IAttr = instantiateExternalAttribute(Attribute, CS);
379 if (IAttr.hasValue())
380 Graph.addNode(IAttr->IValue, IAttr->Attr);
387 void visitCallSite(CallSite CS) {
388 auto Inst = CS.getInstruction();
390 // Make sure all arguments and return value are added to the graph first
391 for (Value *V : CS.args())
392 if (V->getType()->isPointerTy())
394 if (Inst->getType()->isPointerTy())
397 // Check if Inst is a call to a library function that
398 // allocates/deallocates
399 // on the heap. Those kinds of functions do not introduce any aliases.
400 // TODO: address other common library functions such as realloc(),
403 if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) ||
404 isFreeCall(Inst, &TLI))
407 // TODO: Add support for noalias args/all the other fun function
409 // that we can tack on.
410 SmallVector<Function *, 4> Targets;
411 if (getPossibleTargets(CS, Targets))
412 if (tryInterproceduralAnalysis(CS, Targets))
415 // Because the function is opaque, we need to note that anything
416 // could have happened to the arguments (unless the function is marked
417 // readonly or readnone), and that the result could alias just about
418 // anything, too (unless the result is marked noalias).
419 if (!CS.onlyReadsMemory())
420 for (Value *V : CS.args()) {
421 if (V->getType()->isPointerTy()) {
422 // The argument itself escapes.
423 Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped());
424 // The fate of argument memory is unknown. Note that since
425 // AliasAttrs is transitive with respect to dereference, we only
426 // need to specify it for the first-level memory.
427 Graph.addNode(InstantiatedValue{V, 1}, getAttrUnknown());
431 if (Inst->getType()->isPointerTy()) {
432 auto *Fn = CS.getCalledFunction();
433 if (Fn == nullptr || !Fn->doesNotAlias(0))
434 // No need to call addNode() since we've added Inst at the
435 // beginning of this function and we know it is not a global.
436 Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown());
440 /// Because vectors/aggregates are immutable and unaddressable, there's
441 /// nothing we can do to coax a value out of them, other than calling
442 /// Extract{Element,Value}. We can effectively treat them as pointers to
443 /// arbitrary memory locations we can store in and load from.
444 void visitExtractElementInst(ExtractElementInst &Inst) {
445 auto *Ptr = Inst.getVectorOperand();
447 addLoadEdge(Ptr, Val);
450 void visitInsertElementInst(InsertElementInst &Inst) {
451 auto *Vec = Inst.getOperand(0);
452 auto *Val = Inst.getOperand(1);
453 addAssignEdge(Vec, &Inst);
454 addStoreEdge(Val, &Inst);
457 void visitLandingPadInst(LandingPadInst &Inst) {
458 // Exceptions come from "nowhere", from our analysis' perspective.
459 // So we place the instruction its own group, noting that said group may
461 if (Inst.getType()->isPointerTy())
462 addNode(&Inst, getAttrUnknown());
465 void visitInsertValueInst(InsertValueInst &Inst) {
466 auto *Agg = Inst.getOperand(0);
467 auto *Val = Inst.getOperand(1);
468 addAssignEdge(Agg, &Inst);
469 addStoreEdge(Val, &Inst);
472 void visitExtractValueInst(ExtractValueInst &Inst) {
473 auto *Ptr = Inst.getAggregateOperand();
474 addLoadEdge(Ptr, &Inst);
477 void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
478 auto *From1 = Inst.getOperand(0);
479 auto *From2 = Inst.getOperand(1);
480 addAssignEdge(From1, &Inst);
481 addAssignEdge(From2, &Inst);
484 void visitConstantExpr(ConstantExpr *CE) {
485 switch (CE->getOpcode()) {
486 case Instruction::GetElementPtr: {
487 auto GEPOp = cast<GEPOperator>(CE);
491 case Instruction::PtrToInt: {
492 auto *Ptr = CE->getOperand(0);
493 addNode(Ptr, getAttrEscaped());
496 case Instruction::IntToPtr: {
497 addNode(CE, getAttrUnknown());
500 case Instruction::BitCast:
501 case Instruction::AddrSpaceCast:
502 case Instruction::Trunc:
503 case Instruction::ZExt:
504 case Instruction::SExt:
505 case Instruction::FPExt:
506 case Instruction::FPTrunc:
507 case Instruction::UIToFP:
508 case Instruction::SIToFP:
509 case Instruction::FPToUI:
510 case Instruction::FPToSI: {
511 auto *Src = CE->getOperand(0);
512 addAssignEdge(Src, CE);
515 case Instruction::Select: {
516 auto *TrueVal = CE->getOperand(0);
517 auto *FalseVal = CE->getOperand(1);
518 addAssignEdge(TrueVal, CE);
519 addAssignEdge(FalseVal, CE);
522 case Instruction::InsertElement: {
523 auto *Vec = CE->getOperand(0);
524 auto *Val = CE->getOperand(1);
525 addAssignEdge(Vec, CE);
526 addStoreEdge(Val, CE);
529 case Instruction::ExtractElement: {
530 auto *Ptr = CE->getOperand(0);
531 addLoadEdge(Ptr, CE);
534 case Instruction::InsertValue: {
535 auto *Agg = CE->getOperand(0);
536 auto *Val = CE->getOperand(1);
537 addAssignEdge(Agg, CE);
538 addStoreEdge(Val, CE);
541 case Instruction::ExtractValue: {
542 auto *Ptr = CE->getOperand(0);
543 addLoadEdge(Ptr, CE);
545 case Instruction::ShuffleVector: {
546 auto *From1 = CE->getOperand(0);
547 auto *From2 = CE->getOperand(1);
548 addAssignEdge(From1, CE);
549 addAssignEdge(From2, CE);
552 case Instruction::Add:
553 case Instruction::Sub:
554 case Instruction::FSub:
555 case Instruction::Mul:
556 case Instruction::FMul:
557 case Instruction::UDiv:
558 case Instruction::SDiv:
559 case Instruction::FDiv:
560 case Instruction::URem:
561 case Instruction::SRem:
562 case Instruction::FRem:
563 case Instruction::And:
564 case Instruction::Or:
565 case Instruction::Xor:
566 case Instruction::Shl:
567 case Instruction::LShr:
568 case Instruction::AShr:
569 case Instruction::ICmp:
570 case Instruction::FCmp: {
571 addAssignEdge(CE->getOperand(0), CE);
572 addAssignEdge(CE->getOperand(1), CE);
576 llvm_unreachable("Unknown instruction type encountered!");
583 // Determines whether or not we an instruction is useless to us (e.g.
585 static bool hasUsefulEdges(Instruction *Inst) {
586 bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) &&
587 !isa<InvokeInst>(Inst) &&
588 !isa<ReturnInst>(Inst);
589 return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
590 !IsNonInvokeRetTerminator;
593 void addArgumentToGraph(Argument &Arg) {
594 if (Arg.getType()->isPointerTy()) {
595 Graph.addNode(InstantiatedValue{&Arg, 0},
596 getGlobalOrArgAttrFromValue(Arg));
597 // Pointees of a formal parameter is known to the caller
598 Graph.addNode(InstantiatedValue{&Arg, 1}, getAttrCaller());
602 // Given an Instruction, this will add it to the graph, along with any
603 // Instructions that are potentially only available from said Instruction
604 // For example, given the following line:
605 // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
606 // addInstructionToGraph would add both the `load` and `getelementptr`
607 // instructions to the graph appropriately.
608 void addInstructionToGraph(GetEdgesVisitor &Visitor, Instruction &Inst) {
609 if (!hasUsefulEdges(&Inst))
615 // Builds the graph needed for constructing the StratifiedSets for the given
617 void buildGraphFrom(Function &Fn) {
618 GetEdgesVisitor Visitor(*this, Fn.getParent()->getDataLayout());
620 for (auto &Bb : Fn.getBasicBlockList())
621 for (auto &Inst : Bb.getInstList())
622 addInstructionToGraph(Visitor, Inst);
624 for (auto &Arg : Fn.args())
625 addArgumentToGraph(Arg);
629 CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn)
630 : Analysis(Analysis), TLI(TLI) {
634 const CFLGraph &getCFLGraph() const { return Graph; }
635 const SmallVector<Value *, 4> &getReturnValues() const {
636 return ReturnedValues;