1 //===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the TypeBasedAliasAnalysis pass, which implements
11 // metadata-based TBAA.
13 // In LLVM IR, memory does not have types, so LLVM's own type system is not
14 // suitable for doing TBAA. Instead, metadata is added to the IR to describe
15 // a type system of a higher level language. This can be used to implement
16 // typical C/C++ TBAA, but it can also be used to implement custom alias
17 // analysis behavior for other languages.
19 // We now support two types of metadata format: scalar TBAA and struct-path
20 // aware TBAA. After all testing cases are upgraded to use struct-path aware
21 // TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA
24 // The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
25 // three fields, e.g.:
26 // !0 = !{ !"an example type tree" }
27 // !1 = !{ !"int", !0 }
28 // !2 = !{ !"float", !0 }
29 // !3 = !{ !"const float", !2, i64 1 }
31 // The first field is an identity field. It can be any value, usually
32 // an MDString, which uniquely identifies the type. The most important
33 // name in the tree is the name of the root node. Two trees with
34 // different root node names are entirely disjoint, even if they
35 // have leaves with common names.
37 // The second field identifies the type's parent node in the tree, or
38 // is null or omitted for a root node. A type is considered to alias
39 // all of its descendants and all of its ancestors in the tree. Also,
40 // a type is considered to alias all types in other trees, so that
41 // bitcode produced from multiple front-ends is handled conservatively.
43 // If the third field is present, it's an integer which if equal to 1
44 // indicates that the type is "constant" (meaning pointsToConstantMemory
45 // should return true; see
46 // http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
48 // With struct-path aware TBAA, the MDNodes attached to an instruction using
49 // "!tbaa" are called path tag nodes.
51 // The path tag node has 4 fields with the last field being optional.
53 // The first field is the base type node, it can be a struct type node
54 // or a scalar type node. The second field is the access type node, it
55 // must be a scalar type node. The third field is the offset into the base type.
56 // The last field has the same meaning as the last field of our scalar TBAA:
57 // it's an integer which if equal to 1 indicates that the access is "constant".
59 // The struct type node has a name and a list of pairs, one pair for each member
60 // of the struct. The first element of each pair is a type node (a struct type
61 // node or a scalar type node), specifying the type of the member, the second
62 // element of each pair is the offset of the member.
73 // For an access to B.a.s, we attach !5 (a path tag node) to the load/store
74 // instruction. The base type is !4 (struct B), the access type is !2 (scalar
75 // type short) and the offset is 4.
77 // !0 = !{!"Simple C/C++ TBAA"}
78 // !1 = !{!"omnipotent char", !0} // Scalar type node
79 // !2 = !{!"short", !1} // Scalar type node
80 // !3 = !{!"A", !2, i64 0} // Struct type node
81 // !4 = !{!"B", !2, i64 0, !3, i64 4}
82 // // Struct type node
83 // !5 = !{!4, !2, i64 4} // Path tag node
85 // The struct type nodes and the scalar type nodes form a type DAG.
87 // char (!1) -- edge to Root
88 // short (!2) -- edge to char
89 // A (!3) -- edge with offset 0 to short
90 // B (!4) -- edge with offset 0 to short and edge with offset 4 to A
92 // To check if two tags (tagX and tagY) can alias, we start from the base type
93 // of tagX, follow the edge with the correct offset in the type DAG and adjust
94 // the offset until we reach the base type of tagY or until we reach the Root
96 // If we reach the base type of tagY, compare the adjusted offset with
97 // offset of tagY, return Alias if the offsets are the same, return NoAlias
99 // If we reach the Root node, perform the above starting from base type of tagY
100 // to see if we reach base type of tagX.
102 // If they have different roots, they're part of different potentially
103 // unrelated type systems, so we return Alias to be conservative.
104 // If neither node is an ancestor of the other and they have the same root,
105 // then we say NoAlias.
107 // TODO: The current metadata format doesn't support struct
108 // fields. For example:
113 // void foo(struct X *x, struct X *y, double *p) {
117 // Struct X has a double member, so the store to *x can alias the store to *p.
118 // Currently it's not possible to precisely describe all the things struct X
119 // aliases, so struct assignments must use conservative TBAA nodes. There's
120 // no scheme for attaching metadata to @llvm.memcpy yet either.
122 //===----------------------------------------------------------------------===//
124 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
125 #include "llvm/ADT/SetVector.h"
126 #include "llvm/Analysis/AliasAnalysis.h"
127 #include "llvm/Analysis/MemoryLocation.h"
128 #include "llvm/IR/Constants.h"
129 #include "llvm/IR/DerivedTypes.h"
130 #include "llvm/IR/Instruction.h"
131 #include "llvm/IR/LLVMContext.h"
132 #include "llvm/IR/Metadata.h"
133 #include "llvm/Pass.h"
134 #include "llvm/Support/Casting.h"
135 #include "llvm/Support/CommandLine.h"
136 #include "llvm/Support/ErrorHandling.h"
140 using namespace llvm;
142 // A handy option for disabling TBAA functionality. The same effect can also be
143 // achieved by stripping the !tbaa tags from IR, but this option is sometimes
145 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true), cl::Hidden);
149 /// This is a simple wrapper around an MDNode which provides a higher-level
150 /// interface by hiding the details of how alias analysis information is encoded
152 template<typename MDNodeTy>
154 MDNodeTy *Node = nullptr;
157 TBAANodeImpl() = default;
158 explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {}
160 /// getNode - Get the MDNode for this TBAANode.
161 MDNodeTy *getNode() const { return Node; }
163 /// getParent - Get this TBAANode's Alias tree parent.
164 TBAANodeImpl<MDNodeTy> getParent() const {
165 if (Node->getNumOperands() < 2)
166 return TBAANodeImpl<MDNodeTy>();
167 MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1));
169 return TBAANodeImpl<MDNodeTy>();
170 // Ok, this node has a valid parent. Return it.
171 return TBAANodeImpl<MDNodeTy>(P);
174 /// Test if this TBAANode represents a type for objects which are
175 /// not modified (by any means) in the context where this
176 /// AliasAnalysis is relevant.
177 bool isTypeImmutable() const {
178 if (Node->getNumOperands() < 3)
180 ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
183 return CI->getValue()[0];
187 /// \name Specializations of \c TBAANodeImpl for const and non const qualified
190 using TBAANode = TBAANodeImpl<const MDNode>;
191 using MutableTBAANode = TBAANodeImpl<MDNode>;
194 /// This is a simple wrapper around an MDNode which provides a
195 /// higher-level interface by hiding the details of how alias analysis
196 /// information is encoded in its operands.
197 template<typename MDNodeTy>
198 class TBAAStructTagNodeImpl {
199 /// This node should be created with createTBAAStructTagNode.
203 explicit TBAAStructTagNodeImpl(MDNodeTy *N) : Node(N) {}
205 /// Get the MDNode for this TBAAStructTagNode.
206 MDNodeTy *getNode() const { return Node; }
208 MDNodeTy *getBaseType() const {
209 return dyn_cast_or_null<MDNode>(Node->getOperand(0));
212 MDNodeTy *getAccessType() const {
213 return dyn_cast_or_null<MDNode>(Node->getOperand(1));
216 uint64_t getOffset() const {
217 return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
220 /// Test if this TBAAStructTagNode represents a type for objects
221 /// which are not modified (by any means) in the context where this
222 /// AliasAnalysis is relevant.
223 bool isTypeImmutable() const {
224 if (Node->getNumOperands() < 4)
226 ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
229 return CI->getValue()[0];
233 /// \name Specializations of \c TBAAStructTagNodeImpl for const and non const
234 /// qualified \c MDNods.
236 using TBAAStructTagNode = TBAAStructTagNodeImpl<const MDNode>;
237 using MutableTBAAStructTagNode = TBAAStructTagNodeImpl<MDNode>;
240 /// This is a simple wrapper around an MDNode which provides a
241 /// higher-level interface by hiding the details of how alias analysis
242 /// information is encoded in its operands.
243 class TBAAStructTypeNode {
244 /// This node should be created with createTBAAStructTypeNode.
245 const MDNode *Node = nullptr;
248 TBAAStructTypeNode() = default;
249 explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
251 /// Get the MDNode for this TBAAStructTypeNode.
252 const MDNode *getNode() const { return Node; }
254 /// Get this TBAAStructTypeNode's field in the type DAG with
255 /// given offset. Update the offset to be relative to the field type.
256 TBAAStructTypeNode getParent(uint64_t &Offset) const {
257 // Parent can be omitted for the root node.
258 if (Node->getNumOperands() < 2)
259 return TBAAStructTypeNode();
261 // Fast path for a scalar type node and a struct type node with a single
263 if (Node->getNumOperands() <= 3) {
264 uint64_t Cur = Node->getNumOperands() == 2
266 : mdconst::extract<ConstantInt>(Node->getOperand(2))
269 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
271 return TBAAStructTypeNode();
272 return TBAAStructTypeNode(P);
275 // Assume the offsets are in order. We return the previous field if
276 // the current offset is bigger than the given offset.
278 for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
279 uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
283 "TBAAStructTypeNode::getParent should have an offset match!");
288 // Move along the last field.
290 TheIdx = Node->getNumOperands() - 2;
291 uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
294 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
296 return TBAAStructTypeNode();
297 return TBAAStructTypeNode(P);
301 } // end anonymous namespace
303 /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
304 /// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA
306 static bool isStructPathTBAA(const MDNode *MD) {
307 // Anonymous TBAA root starts with a MDNode and dragonegg uses it as
309 return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
312 AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
313 const MemoryLocation &LocB) {
315 return AAResultBase::alias(LocA, LocB);
317 // If accesses may alias, chain to the next AliasAnalysis.
318 if (Aliases(LocA.AATags.TBAA, LocB.AATags.TBAA))
319 return AAResultBase::alias(LocA, LocB);
321 // Otherwise return a definitive result.
325 bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
328 return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
330 const MDNode *M = Loc.AATags.TBAA;
332 return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
334 // If this is an "immutable" type, we can assume the pointer is pointing
335 // to constant memory.
336 if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
337 (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
340 return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
343 FunctionModRefBehavior
344 TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
346 return AAResultBase::getModRefBehavior(CS);
348 FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
350 // If this is an "immutable" type, we can assume the call doesn't write
352 if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
353 if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
354 (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
355 Min = FMRB_OnlyReadsMemory;
357 return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
360 FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
361 // Functions don't have metadata. Just chain to the next implementation.
362 return AAResultBase::getModRefBehavior(F);
365 ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS,
366 const MemoryLocation &Loc) {
368 return AAResultBase::getModRefInfo(CS, Loc);
370 if (const MDNode *L = Loc.AATags.TBAA)
371 if (const MDNode *M =
372 CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
374 return ModRefInfo::NoModRef;
376 return AAResultBase::getModRefInfo(CS, Loc);
379 ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1,
380 ImmutableCallSite CS2) {
382 return AAResultBase::getModRefInfo(CS1, CS2);
384 if (const MDNode *M1 =
385 CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
386 if (const MDNode *M2 =
387 CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
388 if (!Aliases(M1, M2))
389 return ModRefInfo::NoModRef;
391 return AAResultBase::getModRefInfo(CS1, CS2);
394 bool MDNode::isTBAAVtableAccess() const {
395 if (!isStructPathTBAA(this)) {
396 if (getNumOperands() < 1)
398 if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
399 if (Tag1->getString() == "vtable pointer")
405 // For struct-path aware TBAA, we use the access type of the tag.
406 if (getNumOperands() < 2)
408 MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
411 if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
412 if (Tag1->getString() == "vtable pointer")
418 static bool matchAccessTags(const MDNode *A, const MDNode *B,
419 const MDNode **GenericTag = nullptr);
421 MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
422 const MDNode *GenericTag;
423 matchAccessTags(A, B, &GenericTag);
424 return const_cast<MDNode*>(GenericTag);
427 static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) {
434 SmallSetVector<const MDNode *, 4> PathA;
436 while (TA.getNode()) {
437 if (PathA.count(TA.getNode()))
438 report_fatal_error("Cycle found in TBAA metadata.");
439 PathA.insert(TA.getNode());
443 SmallSetVector<const MDNode *, 4> PathB;
445 while (TB.getNode()) {
446 if (PathB.count(TB.getNode()))
447 report_fatal_error("Cycle found in TBAA metadata.");
448 PathB.insert(TB.getNode());
452 int IA = PathA.size() - 1;
453 int IB = PathB.size() - 1;
455 const MDNode *Ret = nullptr;
456 while (IA >= 0 && IB >= 0) {
457 if (PathA[IA] == PathB[IB])
468 void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
471 MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa));
473 N.TBAA = getMetadata(LLVMContext::MD_tbaa);
476 N.Scope = MDNode::getMostGenericAliasScope(
477 N.Scope, getMetadata(LLVMContext::MD_alias_scope));
479 N.Scope = getMetadata(LLVMContext::MD_alias_scope);
483 MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias));
485 N.NoAlias = getMetadata(LLVMContext::MD_noalias);
488 static bool findAccessType(TBAAStructTagNode BaseTag,
489 const MDNode *AccessTypeNode,
490 uint64_t &OffsetInBase) {
491 // Start from the base type, follow the edge with the correct offset in
492 // the type DAG and adjust the offset until we reach the access type or
493 // until we reach a root node.
494 TBAAStructTypeNode BaseType(BaseTag.getBaseType());
495 OffsetInBase = BaseTag.getOffset();
497 while (const MDNode *BaseTypeNode = BaseType.getNode()) {
498 if (BaseTypeNode == AccessTypeNode)
501 // Follow the edge with the correct offset, Offset will be adjusted to
502 // be relative to the field type.
503 BaseType = BaseType.getParent(OffsetInBase);
508 static const MDNode *createAccessTag(const MDNode *AccessType) {
509 // If there is no access type or the access type is the root node, then
510 // we don't have any useful access tag to return.
511 if (!AccessType || AccessType->getNumOperands() < 2)
514 Type *Int64 = IntegerType::get(AccessType->getContext(), 64);
515 auto *ImmutabilityFlag = ConstantAsMetadata::get(ConstantInt::get(Int64, 0));
516 Metadata *Ops[] = {const_cast<MDNode*>(AccessType),
517 const_cast<MDNode*>(AccessType), ImmutabilityFlag};
518 return MDNode::get(AccessType->getContext(), Ops);
521 /// matchTags - Return true if the given couple of accesses are allowed to
522 /// overlap. If \arg GenericTag is not null, then on return it points to the
523 /// most generic access descriptor for the given two.
524 static bool matchAccessTags(const MDNode *A, const MDNode *B,
525 const MDNode **GenericTag) {
532 // Accesses with no TBAA information may alias with any other accesses.
535 *GenericTag = nullptr;
539 // Verify that both input nodes are struct-path aware. Auto-upgrade should
540 // have taken care of this.
541 assert(isStructPathTBAA(A) && "Access A is not struct-path aware!");
542 assert(isStructPathTBAA(B) && "Access B is not struct-path aware!");
544 TBAAStructTagNode TagA(A), TagB(B);
545 const MDNode *CommonType = getLeastCommonType(TagA.getAccessType(),
546 TagB.getAccessType());
548 // TODO: We need to check if AccessType of TagA encloses AccessType of
549 // TagB to support aggregate AccessType. If yes, return true.
551 // Climb the type DAG from base type of A to see if we reach base type of B.
553 if (findAccessType(TagA, TagB.getBaseType(), OffsetA)) {
554 bool SameMemberAccess = OffsetA == TagB.getOffset();
556 *GenericTag = SameMemberAccess ? TagB.getNode() :
557 createAccessTag(CommonType);
558 return SameMemberAccess;
561 // Climb the type DAG from base type of B to see if we reach base type of A.
563 if (findAccessType(TagB, TagA.getBaseType(), OffsetB)) {
564 bool SameMemberAccess = OffsetB == TagA.getOffset();
566 *GenericTag = SameMemberAccess ? TagA.getNode() :
567 createAccessTag(CommonType);
568 return SameMemberAccess;
572 *GenericTag = createAccessTag(CommonType);
574 // If the final access types have different roots, they're part of different
575 // potentially unrelated type systems, so we must be conservative.
579 // If they have the same root, then we've proved there's no alias.
583 /// Aliases - Test whether the access represented by tag A may alias the
584 /// access represented by tag B.
585 bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
586 return matchAccessTags(A, B);
589 AnalysisKey TypeBasedAA::Key;
591 TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) {
592 return TypeBasedAAResult();
595 char TypeBasedAAWrapperPass::ID = 0;
596 INITIALIZE_PASS(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis",
599 ImmutablePass *llvm::createTypeBasedAAWrapperPass() {
600 return new TypeBasedAAWrapperPass();
603 TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) {
604 initializeTypeBasedAAWrapperPassPass(*PassRegistry::getPassRegistry());
607 bool TypeBasedAAWrapperPass::doInitialization(Module &M) {
608 Result.reset(new TypeBasedAAResult());
612 bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
617 void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
618 AU.setPreservesAll();