//===- StackSafetyAnalysis.cpp - Stack memory safety analysis -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // //===----------------------------------------------------------------------===// #include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "stack-safety" static cl::opt StackSafetyMaxIterations("stack-safety-max-iterations", cl::init(20), cl::Hidden); namespace { /// Rewrite an SCEV expression for a memory access address to an expression that /// represents offset from the given alloca. class AllocaOffsetRewriter : public SCEVRewriteVisitor { const Value *AllocaPtr; public: AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr) : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {} const SCEV *visit(const SCEV *Expr) { // Only re-write the expression if the alloca is used in an addition // expression (it can be used in other types of expressions if it's cast to // an int and passed as an argument.) if (!isa(Expr) && !isa(Expr) && !isa(Expr)) return Expr; return SCEVRewriteVisitor::visit(Expr); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { // FIXME: look through one or several levels of definitions? // This can be inttoptr(AllocaPtr) and SCEV would not unwrap // it for us. if (Expr->getValue() == AllocaPtr) return SE.getZero(Expr->getType()); return Expr; } }; /// Describes use of address in as a function call argument. struct PassAsArgInfo { /// Function being called. const GlobalValue *Callee = nullptr; /// Index of argument which pass address. size_t ParamNo = 0; // Offset range of address from base address (alloca or calling function // argument). // Range should never set to empty-set, that is an invalid access range // that can cause empty-set to be propagated with ConstantRange::add ConstantRange Offset; PassAsArgInfo(const GlobalValue *Callee, size_t ParamNo, ConstantRange Offset) : Callee(Callee), ParamNo(ParamNo), Offset(Offset) {} StringRef getName() const { return Callee->getName(); } }; raw_ostream &operator<<(raw_ostream &OS, const PassAsArgInfo &P) { return OS << "@" << P.getName() << "(arg" << P.ParamNo << ", " << P.Offset << ")"; } /// Describe uses of address (alloca or parameter) inside of the function. struct UseInfo { // Access range if the address (alloca or parameters). // It is allowed to be empty-set when there are no known accesses. ConstantRange Range; // List of calls which pass address as an argument. SmallVector Calls; explicit UseInfo(unsigned PointerSize) : Range{PointerSize, false} {} void updateRange(ConstantRange R) { Range = Range.unionWith(R); } }; raw_ostream &operator<<(raw_ostream &OS, const UseInfo &U) { OS << U.Range; for (auto &Call : U.Calls) OS << ", " << Call; return OS; } struct AllocaInfo { const AllocaInst *AI = nullptr; uint64_t Size = 0; UseInfo Use; AllocaInfo(unsigned PointerSize, const AllocaInst *AI, uint64_t Size) : AI(AI), Size(Size), Use(PointerSize) {} StringRef getName() const { return AI->getName(); } }; raw_ostream &operator<<(raw_ostream &OS, const AllocaInfo &A) { return OS << A.getName() << "[" << A.Size << "]: " << A.Use; } struct ParamInfo { const Argument *Arg = nullptr; UseInfo Use; explicit ParamInfo(unsigned PointerSize, const Argument *Arg) : Arg(Arg), Use(PointerSize) {} StringRef getName() const { return Arg ? Arg->getName() : ""; } }; raw_ostream &operator<<(raw_ostream &OS, const ParamInfo &P) { return OS << P.getName() << "[]: " << P.Use; } /// Calculate the allocation size of a given alloca. Returns 0 if the /// size can not be statically determined. uint64_t getStaticAllocaAllocationSize(const AllocaInst *AI) { const DataLayout &DL = AI->getModule()->getDataLayout(); uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType()); if (AI->isArrayAllocation()) { auto C = dyn_cast(AI->getArraySize()); if (!C) return 0; Size *= C->getZExtValue(); } return Size; } } // end anonymous namespace /// Describes uses of allocas and parameters inside of a single function. struct StackSafetyInfo::FunctionInfo { // May be a Function or a GlobalAlias const GlobalValue *GV = nullptr; // Informations about allocas uses. SmallVector Allocas; // Informations about parameters uses. SmallVector Params; // TODO: describe return value as depending on one or more of its arguments. // StackSafetyDataFlowAnalysis counter stored here for faster access. int UpdateCount = 0; FunctionInfo(const StackSafetyInfo &SSI) : FunctionInfo(*SSI.Info) {} explicit FunctionInfo(const Function *F) : GV(F){}; // Creates FunctionInfo that forwards all the parameters to the aliasee. explicit FunctionInfo(const GlobalAlias *A); FunctionInfo(FunctionInfo &&) = default; bool IsDSOLocal() const { return GV->isDSOLocal(); }; bool IsInterposable() const { return GV->isInterposable(); }; StringRef getName() const { return GV->getName(); } void print(raw_ostream &O) const { // TODO: Consider different printout format after // StackSafetyDataFlowAnalysis. Calls and parameters are irrelevant then. O << " @" << getName() << (IsDSOLocal() ? "" : " dso_preemptable") << (IsInterposable() ? " interposable" : "") << "\n"; O << " args uses:\n"; for (auto &P : Params) O << " " << P << "\n"; O << " allocas uses:\n"; for (auto &AS : Allocas) O << " " << AS << "\n"; } private: FunctionInfo(const FunctionInfo &) = default; }; StackSafetyInfo::FunctionInfo::FunctionInfo(const GlobalAlias *A) : GV(A) { unsigned PointerSize = A->getParent()->getDataLayout().getPointerSizeInBits(); const GlobalObject *Aliasee = A->getBaseObject(); const FunctionType *Type = cast(Aliasee->getValueType()); // 'Forward' all parameters to this alias to the aliasee for (unsigned ArgNo = 0; ArgNo < Type->getNumParams(); ArgNo++) { Params.emplace_back(PointerSize, nullptr); UseInfo &US = Params.back().Use; US.Calls.emplace_back(Aliasee, ArgNo, ConstantRange(APInt(PointerSize, 0))); } } namespace { class StackSafetyLocalAnalysis { const Function &F; const DataLayout &DL; ScalarEvolution &SE; unsigned PointerSize = 0; const ConstantRange UnknownRange; ConstantRange offsetFromAlloca(Value *Addr, const Value *AllocaPtr); ConstantRange getAccessRange(Value *Addr, const Value *AllocaPtr, uint64_t AccessSize); ConstantRange getMemIntrinsicAccessRange(const MemIntrinsic *MI, const Use &U, const Value *AllocaPtr); bool analyzeAllUses(const Value *Ptr, UseInfo &AS); ConstantRange getRange(uint64_t Lower, uint64_t Upper) const { return ConstantRange(APInt(PointerSize, Lower), APInt(PointerSize, Upper)); } public: StackSafetyLocalAnalysis(const Function &F, ScalarEvolution &SE) : F(F), DL(F.getParent()->getDataLayout()), SE(SE), PointerSize(DL.getPointerSizeInBits()), UnknownRange(PointerSize, true) {} // Run the transformation on the associated function. StackSafetyInfo run(); }; ConstantRange StackSafetyLocalAnalysis::offsetFromAlloca(Value *Addr, const Value *AllocaPtr) { if (!SE.isSCEVable(Addr->getType())) return UnknownRange; AllocaOffsetRewriter Rewriter(SE, AllocaPtr); const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); ConstantRange Offset = SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize); assert(!Offset.isEmptySet()); return Offset; } ConstantRange StackSafetyLocalAnalysis::getAccessRange(Value *Addr, const Value *AllocaPtr, uint64_t AccessSize) { if (!SE.isSCEVable(Addr->getType())) return UnknownRange; AllocaOffsetRewriter Rewriter(SE, AllocaPtr); const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); ConstantRange AccessStartRange = SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize); ConstantRange SizeRange = getRange(0, AccessSize); ConstantRange AccessRange = AccessStartRange.add(SizeRange); assert(!AccessRange.isEmptySet()); return AccessRange; } ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange( const MemIntrinsic *MI, const Use &U, const Value *AllocaPtr) { if (auto MTI = dyn_cast(MI)) { if (MTI->getRawSource() != U && MTI->getRawDest() != U) return getRange(0, 1); } else { if (MI->getRawDest() != U) return getRange(0, 1); } const auto *Len = dyn_cast(MI->getLength()); // Non-constant size => unsafe. FIXME: try SCEV getRange. if (!Len) return UnknownRange; ConstantRange AccessRange = getAccessRange(U, AllocaPtr, Len->getZExtValue()); return AccessRange; } /// The function analyzes all local uses of Ptr (alloca or argument) and /// calculates local access range and all function calls where it was used. bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) { SmallPtrSet Visited; SmallVector WorkList; WorkList.push_back(Ptr); // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc. while (!WorkList.empty()) { const Value *V = WorkList.pop_back_val(); for (const Use &UI : V->uses()) { auto I = cast(UI.getUser()); assert(V == UI.get()); switch (I->getOpcode()) { case Instruction::Load: { US.updateRange( getAccessRange(UI, Ptr, DL.getTypeStoreSize(I->getType()))); break; } case Instruction::VAArg: // "va-arg" from a pointer is safe. break; case Instruction::Store: { if (V == I->getOperand(0)) { // Stored the pointer - conservatively assume it may be unsafe. US.updateRange(UnknownRange); return false; } US.updateRange(getAccessRange( UI, Ptr, DL.getTypeStoreSize(I->getOperand(0)->getType()))); break; } case Instruction::Ret: // Information leak. // FIXME: Process parameters correctly. This is a leak only if we return // alloca. US.updateRange(UnknownRange); return false; case Instruction::Call: case Instruction::Invoke: { ImmutableCallSite CS(I); if (I->isLifetimeStartOrEnd()) break; if (const MemIntrinsic *MI = dyn_cast(I)) { US.updateRange(getMemIntrinsicAccessRange(MI, UI, Ptr)); break; } // FIXME: consult devirt? // Do not follow aliases, otherwise we could inadvertently follow // dso_preemptable aliases or aliases with interposable linkage. const GlobalValue *Callee = dyn_cast(CS.getCalledValue()->stripPointerCasts()); if (!Callee) { US.updateRange(UnknownRange); return false; } assert(isa(Callee) || isa(Callee)); ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) { if (A->get() == V) { ConstantRange Offset = offsetFromAlloca(UI, Ptr); US.Calls.emplace_back(Callee, A - B, Offset); } } break; } default: if (Visited.insert(I).second) WorkList.push_back(cast(I)); } } } return true; } StackSafetyInfo StackSafetyLocalAnalysis::run() { StackSafetyInfo::FunctionInfo Info(&F); assert(!F.isDeclaration() && "Can't run StackSafety on a function declaration"); LLVM_DEBUG(dbgs() << "[StackSafety] " << F.getName() << "\n"); for (auto &I : instructions(F)) { if (auto AI = dyn_cast(&I)) { Info.Allocas.emplace_back(PointerSize, AI, getStaticAllocaAllocationSize(AI)); AllocaInfo &AS = Info.Allocas.back(); analyzeAllUses(AI, AS.Use); } } for (const Argument &A : make_range(F.arg_begin(), F.arg_end())) { Info.Params.emplace_back(PointerSize, &A); ParamInfo &PS = Info.Params.back(); analyzeAllUses(&A, PS.Use); } LLVM_DEBUG(dbgs() << "[StackSafety] done\n"); LLVM_DEBUG(Info.print(dbgs())); return StackSafetyInfo(std::move(Info)); } class StackSafetyDataFlowAnalysis { using FunctionMap = std::map; FunctionMap Functions; // Callee-to-Caller multimap. DenseMap> Callers; SetVector WorkList; unsigned PointerSize = 0; const ConstantRange UnknownRange; ConstantRange getArgumentAccessRange(const GlobalValue *Callee, unsigned ParamNo) const; bool updateOneUse(UseInfo &US, bool UpdateToFullSet); void updateOneNode(const GlobalValue *Callee, StackSafetyInfo::FunctionInfo &FS); void updateOneNode(const GlobalValue *Callee) { updateOneNode(Callee, Functions.find(Callee)->second); } void updateAllNodes() { for (auto &F : Functions) updateOneNode(F.first, F.second); } void runDataFlow(); #ifndef NDEBUG void verifyFixedPoint(); #endif public: StackSafetyDataFlowAnalysis( Module &M, std::function FI); StackSafetyGlobalInfo run(); }; StackSafetyDataFlowAnalysis::StackSafetyDataFlowAnalysis( Module &M, std::function FI) : PointerSize(M.getDataLayout().getPointerSizeInBits()), UnknownRange(PointerSize, true) { // Without ThinLTO, run the local analysis for every function in the TU and // then run the DFA. for (auto &F : M.functions()) if (!F.isDeclaration()) Functions.emplace(&F, FI(F)); for (auto &A : M.aliases()) if (isa(A.getBaseObject())) Functions.emplace(&A, StackSafetyInfo::FunctionInfo(&A)); } ConstantRange StackSafetyDataFlowAnalysis::getArgumentAccessRange(const GlobalValue *Callee, unsigned ParamNo) const { auto IT = Functions.find(Callee); // Unknown callee (outside of LTO domain or an indirect call). if (IT == Functions.end()) return UnknownRange; const StackSafetyInfo::FunctionInfo &FS = IT->second; // The definition of this symbol may not be the definition in this linkage // unit. if (!FS.IsDSOLocal() || FS.IsInterposable()) return UnknownRange; if (ParamNo >= FS.Params.size()) // possibly vararg return UnknownRange; return FS.Params[ParamNo].Use.Range; } bool StackSafetyDataFlowAnalysis::updateOneUse(UseInfo &US, bool UpdateToFullSet) { bool Changed = false; for (auto &CS : US.Calls) { assert(!CS.Offset.isEmptySet() && "Param range can't be empty-set, invalid offset range"); ConstantRange CalleeRange = getArgumentAccessRange(CS.Callee, CS.ParamNo); CalleeRange = CalleeRange.add(CS.Offset); if (!US.Range.contains(CalleeRange)) { Changed = true; if (UpdateToFullSet) US.Range = UnknownRange; else US.Range = US.Range.unionWith(CalleeRange); } } return Changed; } void StackSafetyDataFlowAnalysis::updateOneNode( const GlobalValue *Callee, StackSafetyInfo::FunctionInfo &FS) { bool UpdateToFullSet = FS.UpdateCount > StackSafetyMaxIterations; bool Changed = false; for (auto &AS : FS.Allocas) Changed |= updateOneUse(AS.Use, UpdateToFullSet); for (auto &PS : FS.Params) Changed |= updateOneUse(PS.Use, UpdateToFullSet); if (Changed) { LLVM_DEBUG(dbgs() << "=== update [" << FS.UpdateCount << (UpdateToFullSet ? ", full-set" : "") << "] " << FS.getName() << "\n"); // Callers of this function may need updating. for (auto &CallerID : Callers[Callee]) WorkList.insert(CallerID); ++FS.UpdateCount; } } void StackSafetyDataFlowAnalysis::runDataFlow() { Callers.clear(); WorkList.clear(); SmallVector Callees; for (auto &F : Functions) { Callees.clear(); StackSafetyInfo::FunctionInfo &FS = F.second; for (auto &AS : FS.Allocas) for (auto &CS : AS.Use.Calls) Callees.push_back(CS.Callee); for (auto &PS : FS.Params) for (auto &CS : PS.Use.Calls) Callees.push_back(CS.Callee); llvm::sort(Callees); Callees.erase(std::unique(Callees.begin(), Callees.end()), Callees.end()); for (auto &Callee : Callees) Callers[Callee].push_back(F.first); } updateAllNodes(); while (!WorkList.empty()) { const GlobalValue *Callee = WorkList.back(); WorkList.pop_back(); updateOneNode(Callee); } } #ifndef NDEBUG void StackSafetyDataFlowAnalysis::verifyFixedPoint() { WorkList.clear(); updateAllNodes(); assert(WorkList.empty()); } #endif StackSafetyGlobalInfo StackSafetyDataFlowAnalysis::run() { runDataFlow(); LLVM_DEBUG(verifyFixedPoint()); StackSafetyGlobalInfo SSI; for (auto &F : Functions) SSI.emplace(F.first, std::move(F.second)); return SSI; } void print(const StackSafetyGlobalInfo &SSI, raw_ostream &O, const Module &M) { size_t Count = 0; for (auto &F : M.functions()) if (!F.isDeclaration()) { SSI.find(&F)->second.print(O); O << "\n"; ++Count; } for (auto &A : M.aliases()) { SSI.find(&A)->second.print(O); O << "\n"; ++Count; } assert(Count == SSI.size() && "Unexpected functions in the result"); } } // end anonymous namespace StackSafetyInfo::StackSafetyInfo() = default; StackSafetyInfo::StackSafetyInfo(StackSafetyInfo &&) = default; StackSafetyInfo &StackSafetyInfo::operator=(StackSafetyInfo &&) = default; StackSafetyInfo::StackSafetyInfo(FunctionInfo &&Info) : Info(new FunctionInfo(std::move(Info))) {} StackSafetyInfo::~StackSafetyInfo() = default; void StackSafetyInfo::print(raw_ostream &O) const { Info->print(O); } AnalysisKey StackSafetyAnalysis::Key; StackSafetyInfo StackSafetyAnalysis::run(Function &F, FunctionAnalysisManager &AM) { StackSafetyLocalAnalysis SSLA(F, AM.getResult(F)); return SSLA.run(); } PreservedAnalyses StackSafetyPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { OS << "'Stack Safety Local Analysis' for function '" << F.getName() << "'\n"; AM.getResult(F).print(OS); return PreservedAnalyses::all(); } char StackSafetyInfoWrapperPass::ID = 0; StackSafetyInfoWrapperPass::StackSafetyInfoWrapperPass() : FunctionPass(ID) { initializeStackSafetyInfoWrapperPassPass(*PassRegistry::getPassRegistry()); } void StackSafetyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.setPreservesAll(); } void StackSafetyInfoWrapperPass::print(raw_ostream &O, const Module *M) const { SSI.print(O); } bool StackSafetyInfoWrapperPass::runOnFunction(Function &F) { StackSafetyLocalAnalysis SSLA( F, getAnalysis().getSE()); SSI = StackSafetyInfo(SSLA.run()); return false; } AnalysisKey StackSafetyGlobalAnalysis::Key; StackSafetyGlobalInfo StackSafetyGlobalAnalysis::run(Module &M, ModuleAnalysisManager &AM) { FunctionAnalysisManager &FAM = AM.getResult(M).getManager(); StackSafetyDataFlowAnalysis SSDFA( M, [&FAM](Function &F) -> const StackSafetyInfo & { return FAM.getResult(F); }); return SSDFA.run(); } PreservedAnalyses StackSafetyGlobalPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { OS << "'Stack Safety Analysis' for module '" << M.getName() << "'\n"; print(AM.getResult(M), OS, M); return PreservedAnalyses::all(); } char StackSafetyGlobalInfoWrapperPass::ID = 0; StackSafetyGlobalInfoWrapperPass::StackSafetyGlobalInfoWrapperPass() : ModulePass(ID) { initializeStackSafetyGlobalInfoWrapperPassPass( *PassRegistry::getPassRegistry()); } void StackSafetyGlobalInfoWrapperPass::print(raw_ostream &O, const Module *M) const { ::print(SSI, O, *M); } void StackSafetyGlobalInfoWrapperPass::getAnalysisUsage( AnalysisUsage &AU) const { AU.addRequired(); } bool StackSafetyGlobalInfoWrapperPass::runOnModule(Module &M) { StackSafetyDataFlowAnalysis SSDFA( M, [this](Function &F) -> const StackSafetyInfo & { return getAnalysis(F).getResult(); }); SSI = SSDFA.run(); return false; } static const char LocalPassArg[] = "stack-safety-local"; static const char LocalPassName[] = "Stack Safety Local Analysis"; INITIALIZE_PASS_BEGIN(StackSafetyInfoWrapperPass, LocalPassArg, LocalPassName, false, true) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(StackSafetyInfoWrapperPass, LocalPassArg, LocalPassName, false, true) static const char GlobalPassName[] = "Stack Safety Analysis"; INITIALIZE_PASS_BEGIN(StackSafetyGlobalInfoWrapperPass, DEBUG_TYPE, GlobalPassName, false, false) INITIALIZE_PASS_DEPENDENCY(StackSafetyInfoWrapperPass) INITIALIZE_PASS_END(StackSafetyGlobalInfoWrapperPass, DEBUG_TYPE, GlobalPassName, false, false)