1 //======- CFLGraph.h - Abstract stratified sets implementation. --------======//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// This file defines CFLGraph, an auxiliary data structure used by CFL-based
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_ANALYSIS_CFLGRAPH_H
16 #define LLVM_ANALYSIS_CFLGRAPH_H
18 #include "AliasAnalysisSummary.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/Analysis/MemoryBuiltins.h"
21 #include "llvm/IR/InstVisitor.h"
22 #include "llvm/IR/Instructions.h"
27 /// \brief The Program Expression Graph (PEG) of CFL analysis
28 /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
29 /// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
30 /// the main purpose of this graph is to abstract away unrelated facts and
31 /// translate the rest into a form that can be easily digested by CFL analyses.
32 /// Each Node in the graph is an InstantiatedValue, and each edge represent a
33 /// pointer assignment between InstantiatedValue. Pointer
34 /// references/dereferences are not explicitly stored in the graph: we
35 /// implicitly assume that for each node (X, I) it has a dereference edge to (X,
36 /// I+1) and a reference edge to (X, I-1).
39 typedef InstantiatedValue Node;
45 typedef std::vector<Edge> EdgeList;
48 EdgeList Edges, ReverseEdges;
53 std::vector<NodeInfo> Levels;
56 bool addNodeToLevel(unsigned Level) {
57 auto NumLevels = Levels.size();
58 if (NumLevels > Level)
60 Levels.resize(Level + 1);
64 NodeInfo &getNodeInfoAtLevel(unsigned Level) {
65 assert(Level < Levels.size());
68 const NodeInfo &getNodeInfoAtLevel(unsigned Level) const {
69 assert(Level < Levels.size());
73 unsigned getNumLevels() const { return Levels.size(); }
77 typedef DenseMap<Value *, ValueInfo> ValueMap;
80 NodeInfo *getNode(Node N) {
81 auto Itr = ValueImpls.find(N.Val);
82 if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
84 return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
88 typedef ValueMap::const_iterator const_value_iterator;
90 bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) {
91 assert(N.Val != nullptr);
92 auto &ValInfo = ValueImpls[N.Val];
93 auto Changed = ValInfo.addNodeToLevel(N.DerefLevel);
94 ValInfo.getNodeInfoAtLevel(N.DerefLevel).Attr |= Attr;
98 void addAttr(Node N, AliasAttrs Attr) {
99 auto *Info = getNode(N);
100 assert(Info != nullptr);
104 void addEdge(Node From, Node To, int64_t Offset = 0) {
105 auto *FromInfo = getNode(From);
106 assert(FromInfo != nullptr);
107 auto *ToInfo = getNode(To);
108 assert(ToInfo != nullptr);
110 FromInfo->Edges.push_back(Edge{To});
111 ToInfo->ReverseEdges.push_back(Edge{From});
114 const NodeInfo *getNode(Node N) const {
115 auto Itr = ValueImpls.find(N.Val);
116 if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
118 return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
121 AliasAttrs attrFor(Node N) const {
122 auto *Info = getNode(N);
123 assert(Info != nullptr);
127 iterator_range<const_value_iterator> value_mappings() const {
128 return make_range<const_value_iterator>(ValueImpls.begin(),
133 ///\brief A builder class used to create CFLGraph instance from a given function
134 /// The CFL-AA that uses this builder must provide its own type as a template
135 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder
136 /// needs a way of obtaining the summary of other functions when callinsts are
138 /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
139 /// member function that takes a Function& and returns the corresponding summary
140 /// as a const AliasSummary*.
141 template <typename CFLAA> class CFLGraphBuilder {
142 // Input of the builder
144 const TargetLibraryInfo &TLI;
146 // Output of the builder
148 SmallVector<Value *, 4> ReturnedValues;
151 /// Gets the edges our graph should have, based on an Instruction*
152 class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
154 const TargetLibraryInfo &TLI;
157 SmallVectorImpl<Value *> &ReturnValues;
159 static bool hasUsefulEdges(ConstantExpr *CE) {
160 // ConstantExpr doesn't have terminators, invokes, or fences, so only
162 // to check for compares.
163 return CE->getOpcode() != Instruction::ICmp &&
164 CE->getOpcode() != Instruction::FCmp;
167 // Returns possible functions called by CS into the given SmallVectorImpl.
168 // Returns true if targets found, false otherwise.
169 static bool getPossibleTargets(CallSite CS,
170 SmallVectorImpl<Function *> &Output) {
171 if (auto *Fn = CS.getCalledFunction()) {
172 Output.push_back(Fn);
176 // TODO: If the call is indirect, we might be able to enumerate all
178 // targets of the call and return them, rather than just failing.
182 void addNode(Value *Val, AliasAttrs Attr = AliasAttrs()) {
183 assert(Val != nullptr && Val->getType()->isPointerTy());
184 if (auto GVal = dyn_cast<GlobalValue>(Val)) {
185 if (Graph.addNode(InstantiatedValue{GVal, 0},
186 getGlobalOrArgAttrFromValue(*GVal)))
187 Graph.addNode(InstantiatedValue{GVal, 1}, getAttrUnknown());
188 } else if (auto CExpr = dyn_cast<ConstantExpr>(Val)) {
189 if (hasUsefulEdges(CExpr)) {
190 if (Graph.addNode(InstantiatedValue{CExpr, 0}))
191 visitConstantExpr(CExpr);
194 Graph.addNode(InstantiatedValue{Val, 0}, Attr);
197 void addAssignEdge(Value *From, Value *To, int64_t Offset = 0) {
198 assert(From != nullptr && To != nullptr);
199 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
204 Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 0},
209 void addDerefEdge(Value *From, Value *To, bool IsRead) {
210 assert(From != nullptr && To != nullptr);
211 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
216 Graph.addNode(InstantiatedValue{From, 1});
217 Graph.addEdge(InstantiatedValue{From, 1}, InstantiatedValue{To, 0});
219 Graph.addNode(InstantiatedValue{To, 1});
220 Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 1});
224 void addLoadEdge(Value *From, Value *To) { addDerefEdge(From, To, true); }
225 void addStoreEdge(Value *From, Value *To) { addDerefEdge(From, To, false); }
228 GetEdgesVisitor(CFLGraphBuilder &Builder)
229 : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph),
230 ReturnValues(Builder.ReturnedValues) {}
232 void visitInstruction(Instruction &) {
233 llvm_unreachable("Unsupported instruction encountered");
236 void visitReturnInst(ReturnInst &Inst) {
237 if (auto RetVal = Inst.getReturnValue()) {
238 if (RetVal->getType()->isPointerTy()) {
240 ReturnValues.push_back(RetVal);
245 void visitPtrToIntInst(PtrToIntInst &Inst) {
246 auto *Ptr = Inst.getOperand(0);
247 addNode(Ptr, getAttrEscaped());
250 void visitIntToPtrInst(IntToPtrInst &Inst) {
252 addNode(Ptr, getAttrUnknown());
255 void visitCastInst(CastInst &Inst) {
256 auto *Src = Inst.getOperand(0);
257 addAssignEdge(Src, &Inst);
260 void visitBinaryOperator(BinaryOperator &Inst) {
261 auto *Op1 = Inst.getOperand(0);
262 auto *Op2 = Inst.getOperand(1);
263 addAssignEdge(Op1, &Inst);
264 addAssignEdge(Op2, &Inst);
267 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
268 auto *Ptr = Inst.getPointerOperand();
269 auto *Val = Inst.getNewValOperand();
270 addStoreEdge(Val, Ptr);
273 void visitAtomicRMWInst(AtomicRMWInst &Inst) {
274 auto *Ptr = Inst.getPointerOperand();
275 auto *Val = Inst.getValOperand();
276 addStoreEdge(Val, Ptr);
279 void visitPHINode(PHINode &Inst) {
280 for (Value *Val : Inst.incoming_values())
281 addAssignEdge(Val, &Inst);
284 void visitGetElementPtrInst(GetElementPtrInst &Inst) {
285 auto *Op = Inst.getPointerOperand();
286 addAssignEdge(Op, &Inst);
289 void visitSelectInst(SelectInst &Inst) {
290 // Condition is not processed here (The actual statement producing
291 // the condition result is processed elsewhere). For select, the
292 // condition is evaluated, but not loaded, stored, or assigned
293 // simply as a result of being the condition of a select.
295 auto *TrueVal = Inst.getTrueValue();
296 auto *FalseVal = Inst.getFalseValue();
297 addAssignEdge(TrueVal, &Inst);
298 addAssignEdge(FalseVal, &Inst);
301 void visitAllocaInst(AllocaInst &Inst) { addNode(&Inst); }
303 void visitLoadInst(LoadInst &Inst) {
304 auto *Ptr = Inst.getPointerOperand();
306 addLoadEdge(Ptr, Val);
309 void visitStoreInst(StoreInst &Inst) {
310 auto *Ptr = Inst.getPointerOperand();
311 auto *Val = Inst.getValueOperand();
312 addStoreEdge(Val, Ptr);
315 void visitVAArgInst(VAArgInst &Inst) {
316 // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
319 // 1. Loads a value from *((T*)*Ptr).
320 // 2. Increments (stores to) *Ptr by some target-specific amount.
321 // For now, we'll handle this like a landingpad instruction (by placing
323 // result in its own group, and having that group alias externals).
324 addNode(&Inst, getAttrUnknown());
327 static bool isFunctionExternal(Function *Fn) {
328 return !Fn->hasExactDefinition();
331 bool tryInterproceduralAnalysis(CallSite CS,
332 const SmallVectorImpl<Function *> &Fns) {
333 assert(Fns.size() > 0);
335 if (CS.arg_size() > MaxSupportedArgsInSummary)
338 // Exit early if we'll fail anyway
339 for (auto *Fn : Fns) {
340 if (isFunctionExternal(Fn) || Fn->isVarArg())
342 // Fail if the caller does not provide enough arguments
343 assert(Fn->arg_size() <= CS.arg_size());
344 if (!AA.getAliasSummary(*Fn))
348 for (auto *Fn : Fns) {
349 auto Summary = AA.getAliasSummary(*Fn);
350 assert(Summary != nullptr);
352 auto &RetParamRelations = Summary->RetParamRelations;
353 for (auto &Relation : RetParamRelations) {
354 auto IRelation = instantiateExternalRelation(Relation, CS);
355 if (IRelation.hasValue()) {
356 Graph.addNode(IRelation->From);
357 Graph.addNode(IRelation->To);
358 Graph.addEdge(IRelation->From, IRelation->To);
362 auto &RetParamAttributes = Summary->RetParamAttributes;
363 for (auto &Attribute : RetParamAttributes) {
364 auto IAttr = instantiateExternalAttribute(Attribute, CS);
365 if (IAttr.hasValue())
366 Graph.addNode(IAttr->IValue, IAttr->Attr);
373 void visitCallSite(CallSite CS) {
374 auto Inst = CS.getInstruction();
376 // Make sure all arguments and return value are added to the graph first
377 for (Value *V : CS.args())
378 if (V->getType()->isPointerTy())
380 if (Inst->getType()->isPointerTy())
383 // Check if Inst is a call to a library function that
384 // allocates/deallocates
385 // on the heap. Those kinds of functions do not introduce any aliases.
386 // TODO: address other common library functions such as realloc(),
389 if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) ||
390 isFreeCall(Inst, &TLI))
393 // TODO: Add support for noalias args/all the other fun function
395 // that we can tack on.
396 SmallVector<Function *, 4> Targets;
397 if (getPossibleTargets(CS, Targets))
398 if (tryInterproceduralAnalysis(CS, Targets))
401 // Because the function is opaque, we need to note that anything
402 // could have happened to the arguments (unless the function is marked
403 // readonly or readnone), and that the result could alias just about
404 // anything, too (unless the result is marked noalias).
405 if (!CS.onlyReadsMemory())
406 for (Value *V : CS.args()) {
407 if (V->getType()->isPointerTy()) {
408 // The argument itself escapes.
409 Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped());
410 // The fate of argument memory is unknown. Note that since
411 // AliasAttrs is transitive with respect to dereference, we only
412 // need to specify it for the first-level memory.
413 Graph.addNode(InstantiatedValue{V, 1}, getAttrUnknown());
417 if (Inst->getType()->isPointerTy()) {
418 auto *Fn = CS.getCalledFunction();
419 if (Fn == nullptr || !Fn->doesNotAlias(0))
420 // No need to call addNode() since we've added Inst at the
421 // beginning of this function and we know it is not a global.
422 Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown());
426 /// Because vectors/aggregates are immutable and unaddressable, there's
427 /// nothing we can do to coax a value out of them, other than calling
428 /// Extract{Element,Value}. We can effectively treat them as pointers to
429 /// arbitrary memory locations we can store in and load from.
430 void visitExtractElementInst(ExtractElementInst &Inst) {
431 auto *Ptr = Inst.getVectorOperand();
433 addLoadEdge(Ptr, Val);
436 void visitInsertElementInst(InsertElementInst &Inst) {
437 auto *Vec = Inst.getOperand(0);
438 auto *Val = Inst.getOperand(1);
439 addAssignEdge(Vec, &Inst);
440 addStoreEdge(Val, &Inst);
443 void visitLandingPadInst(LandingPadInst &Inst) {
444 // Exceptions come from "nowhere", from our analysis' perspective.
445 // So we place the instruction its own group, noting that said group may
447 addNode(&Inst, getAttrUnknown());
450 void visitInsertValueInst(InsertValueInst &Inst) {
451 auto *Agg = Inst.getOperand(0);
452 auto *Val = Inst.getOperand(1);
453 addAssignEdge(Agg, &Inst);
454 addStoreEdge(Val, &Inst);
457 void visitExtractValueInst(ExtractValueInst &Inst) {
458 auto *Ptr = Inst.getAggregateOperand();
459 addLoadEdge(Ptr, &Inst);
462 void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
463 auto *From1 = Inst.getOperand(0);
464 auto *From2 = Inst.getOperand(1);
465 addAssignEdge(From1, &Inst);
466 addAssignEdge(From2, &Inst);
469 void visitConstantExpr(ConstantExpr *CE) {
470 switch (CE->getOpcode()) {
472 llvm_unreachable("Unknown instruction type encountered!");
473 // Build the switch statement using the Instruction.def file.
474 #define HANDLE_INST(NUM, OPCODE, CLASS) \
475 case Instruction::OPCODE: \
476 this->visit##OPCODE(*(CLASS *)CE); \
478 #include "llvm/IR/Instruction.def"
485 // Determines whether or not we an instruction is useless to us (e.g.
487 static bool hasUsefulEdges(Instruction *Inst) {
488 bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) &&
489 !isa<InvokeInst>(Inst) &&
490 !isa<ReturnInst>(Inst);
491 return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
492 !IsNonInvokeRetTerminator;
495 void addArgumentToGraph(Argument &Arg) {
496 if (Arg.getType()->isPointerTy()) {
497 Graph.addNode(InstantiatedValue{&Arg, 0},
498 getGlobalOrArgAttrFromValue(Arg));
499 // Pointees of a formal parameter is known to the caller
500 Graph.addNode(InstantiatedValue{&Arg, 1}, getAttrCaller());
504 // Given an Instruction, this will add it to the graph, along with any
505 // Instructions that are potentially only available from said Instruction
506 // For example, given the following line:
507 // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
508 // addInstructionToGraph would add both the `load` and `getelementptr`
509 // instructions to the graph appropriately.
510 void addInstructionToGraph(GetEdgesVisitor &Visitor, Instruction &Inst) {
511 if (!hasUsefulEdges(&Inst))
517 // Builds the graph needed for constructing the StratifiedSets for the given
519 void buildGraphFrom(Function &Fn) {
520 GetEdgesVisitor Visitor(*this);
522 for (auto &Bb : Fn.getBasicBlockList())
523 for (auto &Inst : Bb.getInstList())
524 addInstructionToGraph(Visitor, Inst);
526 for (auto &Arg : Fn.args())
527 addArgumentToGraph(Arg);
531 CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn)
532 : Analysis(Analysis), TLI(TLI) {
536 const CFLGraph &getCFLGraph() const { return Graph; }
537 const SmallVector<Value *, 4> &getReturnValues() const {
538 return ReturnedValues;