1 //===-- EfficiencySanitizer.cpp - performance tuner -----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is a part of EfficiencySanitizer, a family of performance tuners
11 // that detects multiple performance issues via separate sub-tools.
13 // The instrumentation phase is straightforward:
14 // - Take action on every memory access: either inlined instrumentation,
15 // or Inserted calls to our run-time library.
16 // - Optimizations may apply to avoid instrumenting some of the accesses.
17 // - Turn mem{set,cpy,move} instrinsics into library calls.
18 // The rest is handled by the run-time library.
19 //===----------------------------------------------------------------------===//
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/Analysis/TargetLibraryInfo.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/Type.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include "llvm/Transforms/Instrumentation.h"
35 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
36 #include "llvm/Transforms/Utils/Local.h"
37 #include "llvm/Transforms/Utils/ModuleUtils.h"
41 #define DEBUG_TYPE "esan"
43 // The tool type must be just one of these ClTool* options, as the tools
44 // cannot be combined due to shadow memory constraints.
46 ClToolCacheFrag("esan-cache-frag", cl::init(false),
47 cl::desc("Detect data cache fragmentation"), cl::Hidden);
49 ClToolWorkingSet("esan-working-set", cl::init(false),
50 cl::desc("Measure the working set size"), cl::Hidden);
51 // Each new tool will get its own opt flag here.
52 // These are converted to EfficiencySanitizerOptions for use
55 static cl::opt<bool> ClInstrumentLoadsAndStores(
56 "esan-instrument-loads-and-stores", cl::init(true),
57 cl::desc("Instrument loads and stores"), cl::Hidden);
58 static cl::opt<bool> ClInstrumentMemIntrinsics(
59 "esan-instrument-memintrinsics", cl::init(true),
60 cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
61 static cl::opt<bool> ClInstrumentFastpath(
62 "esan-instrument-fastpath", cl::init(true),
63 cl::desc("Instrument fastpath"), cl::Hidden);
64 static cl::opt<bool> ClAuxFieldInfo(
65 "esan-aux-field-info", cl::init(true),
66 cl::desc("Generate binary with auxiliary struct field information"),
69 // Experiments show that the performance difference can be 2x or more,
70 // and accuracy loss is typically negligible, so we turn this on by default.
71 static cl::opt<bool> ClAssumeIntraCacheLine(
72 "esan-assume-intra-cache-line", cl::init(true),
73 cl::desc("Assume each memory access touches just one cache line, for "
74 "better performance but with a potential loss of accuracy."),
77 STATISTIC(NumInstrumentedLoads, "Number of instrumented loads");
78 STATISTIC(NumInstrumentedStores, "Number of instrumented stores");
79 STATISTIC(NumFastpaths, "Number of instrumented fastpaths");
80 STATISTIC(NumAccessesWithIrregularSize,
81 "Number of accesses with a size outside our targeted callout sizes");
82 STATISTIC(NumIgnoredStructs, "Number of ignored structs");
83 STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions");
84 STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions");
85 STATISTIC(NumAssumedIntraCacheLine,
86 "Number of accesses assumed to be intra-cache-line");
88 static const uint64_t EsanCtorAndDtorPriority = 0;
89 static const char *const EsanModuleCtorName = "esan.module_ctor";
90 static const char *const EsanModuleDtorName = "esan.module_dtor";
91 static const char *const EsanInitName = "__esan_init";
92 static const char *const EsanExitName = "__esan_exit";
94 // We need to specify the tool to the runtime earlier than
95 // the ctor is called in some cases, so we set a global variable.
96 static const char *const EsanWhichToolName = "__esan_which_tool";
98 // We must keep these Shadow* constants consistent with the esan runtime.
99 // FIXME: Try to place these shadow constants, the names of the __esan_*
100 // interface functions, and the ToolType enum into a header shared between
101 // llvm and compiler-rt.
102 struct ShadowMemoryParams {
104 uint64_t ShadowOffs[3];
107 static const ShadowMemoryParams ShadowParams47 = {
108 0x00000fffffffffffull,
110 0x0000130000000000ull, 0x0000220000000000ull, 0x0000440000000000ull,
113 static const ShadowMemoryParams ShadowParams40 = {
116 0x1300000000ull, 0x2200000000ull, 0x4400000000ull,
119 // This array is indexed by the ToolType enum.
120 static const int ShadowScale[] = {
122 2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2.
123 6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6.
126 // MaxStructCounterNameSize is a soft size limit to avoid insanely long
127 // names for those extremely large structs.
128 static const unsigned MaxStructCounterNameSize = 512;
132 static EfficiencySanitizerOptions
133 OverrideOptionsFromCL(EfficiencySanitizerOptions Options) {
135 Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
136 else if (ClToolWorkingSet)
137 Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet;
139 // Direct opt invocation with no params will have the default ESAN_None.
140 // We run the default tool in that case.
141 if (Options.ToolType == EfficiencySanitizerOptions::ESAN_None)
142 Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
147 // Create a constant for Str so that we can pass it to the run-time lib.
148 static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str,
150 Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
151 // We use private linkage for module-local strings. If they can be merged
152 // with another one, we set the unnamed_addr attribute.
154 new GlobalVariable(M, StrConst->getType(), true,
155 GlobalValue::PrivateLinkage, StrConst, "");
157 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
158 GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
162 /// EfficiencySanitizer: instrument each module to find performance issues.
163 class EfficiencySanitizer : public ModulePass {
166 const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions())
167 : ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {}
168 StringRef getPassName() const override;
169 void getAnalysisUsage(AnalysisUsage &AU) const override;
170 bool runOnModule(Module &M) override;
174 bool initOnModule(Module &M);
175 void initializeCallbacks(Module &M);
176 bool shouldIgnoreStructType(StructType *StructTy);
177 void createStructCounterName(
178 StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr);
179 void createCacheFragAuxGV(
180 Module &M, const DataLayout &DL, StructType *StructTy,
181 GlobalVariable *&TypeNames, GlobalVariable *&Offsets, GlobalVariable *&Size);
182 GlobalVariable *createCacheFragInfoGV(Module &M, const DataLayout &DL,
184 Constant *createEsanInitToolInfoArg(Module &M, const DataLayout &DL);
185 void createDestructor(Module &M, Constant *ToolInfoArg);
186 bool runOnFunction(Function &F, Module &M);
187 bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
188 bool instrumentMemIntrinsic(MemIntrinsic *MI);
189 bool instrumentGetElementPtr(Instruction *I, Module &M);
190 bool insertCounterUpdate(Instruction *I, StructType *StructTy,
191 unsigned CounterIdx);
192 unsigned getFieldCounterIdx(StructType *StructTy) {
195 unsigned getArrayCounterIdx(StructType *StructTy) {
196 return StructTy->getNumElements();
198 unsigned getStructCounterSize(StructType *StructTy) {
199 // The struct counter array includes:
200 // - one counter for each struct field,
201 // - one counter for the struct access within an array.
202 return (StructTy->getNumElements()/*field*/ + 1/*array*/);
204 bool shouldIgnoreMemoryAccess(Instruction *I);
205 int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
206 Value *appToShadow(Value *Shadow, IRBuilder<> &IRB);
207 bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore,
208 Value *Addr, unsigned Alignment);
209 // Each tool has its own fastpath routine:
210 bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL,
211 Value *Addr, unsigned Alignment);
212 bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL,
213 Value *Addr, unsigned Alignment);
215 EfficiencySanitizerOptions Options;
218 // Our slowpath involves callouts to the runtime library.
219 // Access sizes are powers of two: 1, 2, 4, 8, 16.
220 static const size_t NumberOfAccessSizes = 5;
221 Function *EsanAlignedLoad[NumberOfAccessSizes];
222 Function *EsanAlignedStore[NumberOfAccessSizes];
223 Function *EsanUnalignedLoad[NumberOfAccessSizes];
224 Function *EsanUnalignedStore[NumberOfAccessSizes];
225 // For irregular sizes of any alignment:
226 Function *EsanUnalignedLoadN, *EsanUnalignedStoreN;
227 Function *MemmoveFn, *MemcpyFn, *MemsetFn;
228 Function *EsanCtorFunction;
229 Function *EsanDtorFunction;
230 // Remember the counter variable for each struct type to avoid
231 // recomputing the variable name later during instrumentation.
232 std::map<Type *, GlobalVariable *> StructTyMap;
233 ShadowMemoryParams ShadowParams;
237 char EfficiencySanitizer::ID = 0;
238 INITIALIZE_PASS_BEGIN(
239 EfficiencySanitizer, "esan",
240 "EfficiencySanitizer: finds performance issues.", false, false)
241 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
243 EfficiencySanitizer, "esan",
244 "EfficiencySanitizer: finds performance issues.", false, false)
246 StringRef EfficiencySanitizer::getPassName() const {
247 return "EfficiencySanitizer";
250 void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage &AU) const {
251 AU.addRequired<TargetLibraryInfoWrapperPass>();
255 llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions &Options) {
256 return new EfficiencySanitizer(Options);
259 void EfficiencySanitizer::initializeCallbacks(Module &M) {
260 IRBuilder<> IRB(M.getContext());
261 // Initialize the callbacks.
262 for (size_t Idx = 0; Idx < NumberOfAccessSizes; ++Idx) {
263 const unsigned ByteSize = 1U << Idx;
264 std::string ByteSizeStr = utostr(ByteSize);
265 // We'll inline the most common (i.e., aligned and frequent sizes)
266 // load + store instrumentation: these callouts are for the slowpath.
267 SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr);
268 EsanAlignedLoad[Idx] =
269 checkSanitizerInterfaceFunction(M.getOrInsertFunction(
270 AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
271 SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr);
272 EsanAlignedStore[Idx] =
273 checkSanitizerInterfaceFunction(M.getOrInsertFunction(
274 AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
275 SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr);
276 EsanUnalignedLoad[Idx] =
277 checkSanitizerInterfaceFunction(M.getOrInsertFunction(
278 UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
279 SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr);
280 EsanUnalignedStore[Idx] =
281 checkSanitizerInterfaceFunction(M.getOrInsertFunction(
282 UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
284 EsanUnalignedLoadN = checkSanitizerInterfaceFunction(
285 M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(),
286 IRB.getInt8PtrTy(), IntptrTy));
287 EsanUnalignedStoreN = checkSanitizerInterfaceFunction(
288 M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(),
289 IRB.getInt8PtrTy(), IntptrTy));
290 MemmoveFn = checkSanitizerInterfaceFunction(
291 M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
292 IRB.getInt8PtrTy(), IntptrTy));
293 MemcpyFn = checkSanitizerInterfaceFunction(
294 M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
295 IRB.getInt8PtrTy(), IntptrTy));
296 MemsetFn = checkSanitizerInterfaceFunction(
297 M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
298 IRB.getInt32Ty(), IntptrTy));
301 bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) {
302 if (StructTy == nullptr || StructTy->isOpaque() /* no struct body */)
307 void EfficiencySanitizer::createStructCounterName(
308 StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr) {
309 // Append NumFields and field type ids to avoid struct conflicts
310 // with the same name but different fields.
311 if (StructTy->hasName())
312 NameStr += StructTy->getName();
314 NameStr += "struct.anon";
315 // We allow the actual size of the StructCounterName to be larger than
316 // MaxStructCounterNameSize and append $NumFields and at least one
318 // Append $NumFields.
320 Twine(StructTy->getNumElements()).toVector(NameStr);
321 // Append struct field type ids in the reverse order.
322 for (int i = StructTy->getNumElements() - 1; i >= 0; --i) {
324 Twine(StructTy->getElementType(i)->getTypeID()).toVector(NameStr);
325 if (NameStr.size() >= MaxStructCounterNameSize)
328 if (StructTy->isLiteral()) {
329 // End with $ for literal struct.
334 // Create global variables with auxiliary information (e.g., struct field size,
335 // offset, and type name) for better user report.
336 void EfficiencySanitizer::createCacheFragAuxGV(
337 Module &M, const DataLayout &DL, StructType *StructTy,
338 GlobalVariable *&TypeName, GlobalVariable *&Offset,
339 GlobalVariable *&Size) {
340 auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
341 auto *Int32Ty = Type::getInt32Ty(*Ctx);
343 auto *TypeNameArrayTy = ArrayType::get(Int8PtrTy, StructTy->getNumElements());
344 TypeName = new GlobalVariable(M, TypeNameArrayTy, true,
345 GlobalVariable::InternalLinkage, nullptr);
346 SmallVector<Constant *, 16> TypeNameVec;
348 auto *OffsetArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
349 Offset = new GlobalVariable(M, OffsetArrayTy, true,
350 GlobalVariable::InternalLinkage, nullptr);
351 SmallVector<Constant *, 16> OffsetVec;
353 auto *SizeArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
354 Size = new GlobalVariable(M, SizeArrayTy, true,
355 GlobalVariable::InternalLinkage, nullptr);
356 SmallVector<Constant *, 16> SizeVec;
357 for (unsigned i = 0; i < StructTy->getNumElements(); ++i) {
358 Type *Ty = StructTy->getElementType(i);
360 raw_string_ostream StrOS(Str);
362 TypeNameVec.push_back(
363 ConstantExpr::getPointerCast(
364 createPrivateGlobalForString(M, StrOS.str(), true),
367 ConstantInt::get(Int32Ty,
368 DL.getStructLayout(StructTy)->getElementOffset(i)));
369 SizeVec.push_back(ConstantInt::get(Int32Ty,
370 DL.getTypeAllocSize(Ty)));
372 TypeName->setInitializer(ConstantArray::get(TypeNameArrayTy, TypeNameVec));
373 Offset->setInitializer(ConstantArray::get(OffsetArrayTy, OffsetVec));
374 Size->setInitializer(ConstantArray::get(SizeArrayTy, SizeVec));
377 // Create the global variable for the cache-fragmentation tool.
378 GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV(
379 Module &M, const DataLayout &DL, Constant *UnitName) {
380 assert(Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag);
382 auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
383 auto *Int8PtrPtrTy = Int8PtrTy->getPointerTo();
384 auto *Int32Ty = Type::getInt32Ty(*Ctx);
385 auto *Int32PtrTy = Type::getInt32PtrTy(*Ctx);
386 auto *Int64Ty = Type::getInt64Ty(*Ctx);
387 auto *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
388 // This structure should be kept consistent with the StructInfo struct
389 // in the runtime library.
390 // struct StructInfo {
391 // const char *StructName;
394 // u32 *FieldOffset; // auxiliary struct field info.
395 // u32 *FieldSize; // auxiliary struct field info.
396 // const char **FieldTypeName; // auxiliary struct field info.
397 // u64 *FieldCounters;
398 // u64 *ArrayCounter;
401 StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy,
402 Int8PtrPtrTy, Int64PtrTy, Int64PtrTy);
403 auto *StructInfoPtrTy = StructInfoTy->getPointerTo();
404 // This structure should be kept consistent with the CacheFragInfo struct
405 // in the runtime library.
406 // struct CacheFragInfo {
407 // const char *UnitName;
409 // StructInfo *Structs;
411 auto *CacheFragInfoTy = StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy);
413 std::vector<StructType *> Vec = M.getIdentifiedStructTypes();
414 unsigned NumStructs = 0;
415 SmallVector<Constant *, 16> Initializers;
417 for (auto &StructTy : Vec) {
418 if (shouldIgnoreStructType(StructTy)) {
425 SmallString<MaxStructCounterNameSize> CounterNameStr;
426 createStructCounterName(StructTy, CounterNameStr);
427 GlobalVariable *StructCounterName = createPrivateGlobalForString(
428 M, CounterNameStr, /*AllowMerging*/true);
431 // We create the counter array with StructCounterName and weak linkage
432 // so that the structs with the same name and layout from different
433 // compilation units will be merged into one.
434 auto *CounterArrayTy = ArrayType::get(Int64Ty,
435 getStructCounterSize(StructTy));
436 GlobalVariable *Counters =
437 new GlobalVariable(M, CounterArrayTy, false,
438 GlobalVariable::WeakAnyLinkage,
439 ConstantAggregateZero::get(CounterArrayTy),
442 // Remember the counter variable for each struct type.
443 StructTyMap.insert(std::pair<Type *, GlobalVariable *>(StructTy, Counters));
445 // We pass the field type name array, offset array, and size array to
446 // the runtime for better reporting.
447 GlobalVariable *TypeName = nullptr, *Offset = nullptr, *Size = nullptr;
449 createCacheFragAuxGV(M, DL, StructTy, TypeName, Offset, Size);
451 Constant *FieldCounterIdx[2];
452 FieldCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
453 FieldCounterIdx[1] = ConstantInt::get(Int32Ty,
454 getFieldCounterIdx(StructTy));
455 Constant *ArrayCounterIdx[2];
456 ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
457 ArrayCounterIdx[1] = ConstantInt::get(Int32Ty,
458 getArrayCounterIdx(StructTy));
459 Initializers.push_back(ConstantStruct::get(
461 ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy),
462 ConstantInt::get(Int32Ty,
463 DL.getStructLayout(StructTy)->getSizeInBytes()),
464 ConstantInt::get(Int32Ty, StructTy->getNumElements()),
465 Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy)
466 : ConstantExpr::getPointerCast(Offset, Int32PtrTy),
467 Size == nullptr ? ConstantPointerNull::get(Int32PtrTy)
468 : ConstantExpr::getPointerCast(Size, Int32PtrTy),
470 ? ConstantPointerNull::get(Int8PtrPtrTy)
471 : ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy),
472 ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
474 ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
478 Constant *StructInfo;
479 if (NumStructs == 0) {
480 StructInfo = ConstantPointerNull::get(StructInfoPtrTy);
482 auto *StructInfoArrayTy = ArrayType::get(StructInfoTy, NumStructs);
483 StructInfo = ConstantExpr::getPointerCast(
484 new GlobalVariable(M, StructInfoArrayTy, false,
485 GlobalVariable::InternalLinkage,
486 ConstantArray::get(StructInfoArrayTy, Initializers)),
490 auto *CacheFragInfoGV = new GlobalVariable(
491 M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage,
492 ConstantStruct::get(CacheFragInfoTy, UnitName,
493 ConstantInt::get(Int32Ty, NumStructs), StructInfo));
494 return CacheFragInfoGV;
497 // Create the tool-specific argument passed to EsanInit and EsanExit.
498 Constant *EfficiencySanitizer::createEsanInitToolInfoArg(Module &M,
499 const DataLayout &DL) {
500 // This structure contains tool-specific information about each compilation
501 // unit (module) and is passed to the runtime library.
502 GlobalVariable *ToolInfoGV = nullptr;
504 auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
505 // Compilation unit name.
506 auto *UnitName = ConstantExpr::getPointerCast(
507 createPrivateGlobalForString(M, M.getModuleIdentifier(), true),
510 // Create the tool-specific variable.
511 if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag)
512 ToolInfoGV = createCacheFragInfoGV(M, DL, UnitName);
514 if (ToolInfoGV != nullptr)
515 return ConstantExpr::getPointerCast(ToolInfoGV, Int8PtrTy);
517 // Create the null pointer if no tool-specific variable created.
518 return ConstantPointerNull::get(Int8PtrTy);
521 void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) {
522 PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
523 EsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*Ctx),
525 GlobalValue::InternalLinkage,
526 EsanModuleDtorName, &M);
527 ReturnInst::Create(*Ctx, BasicBlock::Create(*Ctx, "", EsanDtorFunction));
528 IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator());
529 Function *EsanExit = checkSanitizerInterfaceFunction(
530 M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(),
532 EsanExit->setLinkage(Function::ExternalLinkage);
533 IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg});
534 appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority);
537 bool EfficiencySanitizer::initOnModule(Module &M) {
539 Triple TargetTriple(M.getTargetTriple());
540 if (TargetTriple.getArch() == Triple::mips64 || TargetTriple.getArch() == Triple::mips64el)
541 ShadowParams = ShadowParams40;
543 ShadowParams = ShadowParams47;
545 Ctx = &M.getContext();
546 const DataLayout &DL = M.getDataLayout();
547 IRBuilder<> IRB(M.getContext());
548 IntegerType *OrdTy = IRB.getInt32Ty();
549 PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
550 IntptrTy = DL.getIntPtrType(M.getContext());
551 // Create the variable passed to EsanInit and EsanExit.
552 Constant *ToolInfoArg = createEsanInitToolInfoArg(M, DL);
554 // We specify the tool type both in the EsanWhichToolName global
555 // and as an arg to the init routine as a sanity check.
556 std::tie(EsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
557 M, EsanModuleCtorName, EsanInitName, /*InitArgTypes=*/{OrdTy, Int8PtrTy},
559 ConstantInt::get(OrdTy, static_cast<int>(Options.ToolType)),
561 appendToGlobalCtors(M, EsanCtorFunction, EsanCtorAndDtorPriority);
563 createDestructor(M, ToolInfoArg);
565 new GlobalVariable(M, OrdTy, true,
566 GlobalValue::WeakAnyLinkage,
567 ConstantInt::get(OrdTy,
568 static_cast<int>(Options.ToolType)),
574 Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) {
575 // Shadow = ((App & Mask) + Offs) >> Scale
576 Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowParams.ShadowMask));
578 int Scale = ShadowScale[Options.ToolType];
580 Offs = ShadowParams.ShadowOffs[Scale];
582 Offs = ShadowParams.ShadowOffs[0] << Scale;
583 Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs));
585 Shadow = IRB.CreateLShr(Shadow, Scale);
589 bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) {
590 if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
591 // We'd like to know about cache fragmentation in vtable accesses and
592 // constant data references, so we do not currently ignore anything.
594 } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
595 // TODO: the instrumentation disturbs the data layout on the stack, so we
596 // may want to add an option to ignore stack references (if we can
597 // distinguish them) to reduce overhead.
599 // TODO(bruening): future tools will be returning true for some cases.
603 bool EfficiencySanitizer::runOnModule(Module &M) {
604 bool Res = initOnModule(M);
605 initializeCallbacks(M);
607 Res |= runOnFunction(F, M);
612 bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) {
613 // This is required to prevent instrumenting the call to __esan_init from
614 // within the module constructor.
615 if (&F == EsanCtorFunction)
617 SmallVector<Instruction *, 8> LoadsAndStores;
618 SmallVector<Instruction *, 8> MemIntrinCalls;
619 SmallVector<Instruction *, 8> GetElementPtrs;
621 const DataLayout &DL = M.getDataLayout();
622 const TargetLibraryInfo *TLI =
623 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
626 for (auto &Inst : BB) {
627 if ((isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
628 isa<AtomicRMWInst>(Inst) || isa<AtomicCmpXchgInst>(Inst)) &&
629 !shouldIgnoreMemoryAccess(&Inst))
630 LoadsAndStores.push_back(&Inst);
631 else if (isa<MemIntrinsic>(Inst))
632 MemIntrinCalls.push_back(&Inst);
633 else if (isa<GetElementPtrInst>(Inst))
634 GetElementPtrs.push_back(&Inst);
635 else if (CallInst *CI = dyn_cast<CallInst>(&Inst))
636 maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
640 if (ClInstrumentLoadsAndStores) {
641 for (auto Inst : LoadsAndStores) {
642 Res |= instrumentLoadOrStore(Inst, DL);
646 if (ClInstrumentMemIntrinsics) {
647 for (auto Inst : MemIntrinCalls) {
648 Res |= instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
652 if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
653 for (auto Inst : GetElementPtrs) {
654 Res |= instrumentGetElementPtr(Inst, M);
661 bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I,
662 const DataLayout &DL) {
667 if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
669 Alignment = Load->getAlignment();
670 Addr = Load->getPointerOperand();
671 } else if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
673 Alignment = Store->getAlignment();
674 Addr = Store->getPointerOperand();
675 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
678 Addr = RMW->getPointerOperand();
679 } else if (AtomicCmpXchgInst *Xchg = dyn_cast<AtomicCmpXchgInst>(I)) {
682 Addr = Xchg->getPointerOperand();
684 llvm_unreachable("Unsupported mem access type");
686 Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
687 const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
688 Value *OnAccessFunc = nullptr;
690 // Convert 0 to the default alignment.
692 Alignment = DL.getPrefTypeAlignment(OrigTy);
695 NumInstrumentedStores++;
697 NumInstrumentedLoads++;
698 int Idx = getMemoryAccessFuncIndex(Addr, DL);
700 OnAccessFunc = IsStore ? EsanUnalignedStoreN : EsanUnalignedLoadN;
701 IRB.CreateCall(OnAccessFunc,
702 {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
703 ConstantInt::get(IntptrTy, TypeSizeBytes)});
705 if (ClInstrumentFastpath &&
706 instrumentFastpath(I, DL, IsStore, Addr, Alignment)) {
710 if (Alignment == 0 || (Alignment % TypeSizeBytes) == 0)
711 OnAccessFunc = IsStore ? EsanAlignedStore[Idx] : EsanAlignedLoad[Idx];
713 OnAccessFunc = IsStore ? EsanUnalignedStore[Idx] : EsanUnalignedLoad[Idx];
714 IRB.CreateCall(OnAccessFunc,
715 IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
720 // It's simplest to replace the memset/memmove/memcpy intrinsics with
721 // calls that the runtime library intercepts.
722 // Our pass is late enough that calls should not turn back into intrinsics.
723 bool EfficiencySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
726 if (isa<MemSetInst>(MI)) {
729 {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
730 IRB.CreateIntCast(MI->getArgOperand(1), IRB.getInt32Ty(), false),
731 IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
732 MI->eraseFromParent();
734 } else if (isa<MemTransferInst>(MI)) {
736 isa<MemCpyInst>(MI) ? MemcpyFn : MemmoveFn,
737 {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
738 IRB.CreatePointerCast(MI->getArgOperand(1), IRB.getInt8PtrTy()),
739 IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
740 MI->eraseFromParent();
743 llvm_unreachable("Unsupported mem intrinsic type");
747 bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) {
748 GetElementPtrInst *GepInst = dyn_cast<GetElementPtrInst>(I);
750 if (GepInst == nullptr || GepInst->getNumIndices() == 1) {
754 Type *SourceTy = GepInst->getSourceElementType();
755 StructType *StructTy = nullptr;
757 // Check if GEP calculates address from a struct array.
758 if (isa<StructType>(SourceTy)) {
759 StructTy = cast<StructType>(SourceTy);
760 Idx = dyn_cast<ConstantInt>(GepInst->getOperand(1));
761 if ((Idx == nullptr || Idx->getSExtValue() != 0) &&
762 !shouldIgnoreStructType(StructTy) && StructTyMap.count(StructTy) != 0)
763 Res |= insertCounterUpdate(I, StructTy, getArrayCounterIdx(StructTy));
765 // Iterate all (except the first and the last) idx within each GEP instruction
766 // for possible nested struct field address calculation.
767 for (unsigned i = 1; i < GepInst->getNumIndices(); ++i) {
768 SmallVector<Value *, 8> IdxVec(GepInst->idx_begin(),
769 GepInst->idx_begin() + i);
770 Type *Ty = GetElementPtrInst::getIndexedType(SourceTy, IdxVec);
771 unsigned CounterIdx = 0;
772 if (isa<ArrayType>(Ty)) {
773 ArrayType *ArrayTy = cast<ArrayType>(Ty);
774 StructTy = dyn_cast<StructType>(ArrayTy->getElementType());
775 if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
777 // The last counter for struct array access.
778 CounterIdx = getArrayCounterIdx(StructTy);
779 } else if (isa<StructType>(Ty)) {
780 StructTy = cast<StructType>(Ty);
781 if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
783 // Get the StructTy's subfield index.
784 Idx = cast<ConstantInt>(GepInst->getOperand(i+1));
785 assert(Idx->getSExtValue() >= 0 &&
786 Idx->getSExtValue() < StructTy->getNumElements());
787 CounterIdx = getFieldCounterIdx(StructTy) + Idx->getSExtValue();
789 Res |= insertCounterUpdate(I, StructTy, CounterIdx);
792 ++NumInstrumentedGEPs;
798 bool EfficiencySanitizer::insertCounterUpdate(Instruction *I,
799 StructType *StructTy,
800 unsigned CounterIdx) {
801 GlobalVariable *CounterArray = StructTyMap[StructTy];
802 if (CounterArray == nullptr)
805 Constant *Indices[2];
806 // Xref http://llvm.org/docs/LangRef.html#i-getelementptr and
807 // http://llvm.org/docs/GetElementPtr.html.
808 // The first index of the GEP instruction steps through the first operand,
809 // i.e., the array itself.
810 Indices[0] = ConstantInt::get(IRB.getInt32Ty(), 0);
811 // The second index is the index within the array.
812 Indices[1] = ConstantInt::get(IRB.getInt32Ty(), CounterIdx);
814 ConstantExpr::getGetElementPtr(
815 ArrayType::get(IRB.getInt64Ty(), getStructCounterSize(StructTy)),
816 CounterArray, Indices);
817 Value *Load = IRB.CreateLoad(Counter);
818 IRB.CreateStore(IRB.CreateAdd(Load, ConstantInt::get(IRB.getInt64Ty(), 1)),
823 int EfficiencySanitizer::getMemoryAccessFuncIndex(Value *Addr,
824 const DataLayout &DL) {
825 Type *OrigPtrTy = Addr->getType();
826 Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
827 assert(OrigTy->isSized());
828 // The size is always a multiple of 8.
829 uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
830 if (TypeSizeBytes != 1 && TypeSizeBytes != 2 && TypeSizeBytes != 4 &&
831 TypeSizeBytes != 8 && TypeSizeBytes != 16) {
832 // Irregular sizes do not have per-size call targets.
833 NumAccessesWithIrregularSize++;
836 size_t Idx = countTrailingZeros(TypeSizeBytes);
837 assert(Idx < NumberOfAccessSizes);
841 bool EfficiencySanitizer::instrumentFastpath(Instruction *I,
842 const DataLayout &DL, bool IsStore,
843 Value *Addr, unsigned Alignment) {
844 if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
845 return instrumentFastpathCacheFrag(I, DL, Addr, Alignment);
846 } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
847 return instrumentFastpathWorkingSet(I, DL, Addr, Alignment);
852 bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction *I,
853 const DataLayout &DL,
855 unsigned Alignment) {
857 return true; // Return true to avoid slowpath instrumentation.
860 bool EfficiencySanitizer::instrumentFastpathWorkingSet(
861 Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) {
862 assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this
864 Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
865 const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
866 // Bail to the slowpath if the access might touch multiple cache lines.
867 // An access aligned to its size is guaranteed to be intra-cache-line.
868 // getMemoryAccessFuncIndex has already ruled out a size larger than 16
869 // and thus larger than a cache line for platforms this tool targets
870 // (and our shadow memory setup assumes 64-byte cache lines).
871 assert(TypeSize <= 128);
872 if (!(TypeSize == 8 ||
873 (Alignment % (TypeSize / 8)) == 0)) {
874 if (ClAssumeIntraCacheLine)
875 ++NumAssumedIntraCacheLine;
880 // We inline instrumentation to set the corresponding shadow bits for
881 // each cache line touched by the application. Here we handle a single
882 // load or store where we've already ruled out the possibility that it
883 // might touch more than one cache line and thus we simply update the
884 // shadow memory for a single cache line.
885 // Our shadow memory model is fine with races when manipulating shadow values.
886 // We generate the following code:
888 // const char BitMask = 0x81;
889 // char *ShadowAddr = appToShadow(AppAddr);
890 // if ((*ShadowAddr & BitMask) != BitMask)
891 // *ShadowAddr |= Bitmask;
893 Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy);
894 Value *ShadowPtr = appToShadow(AddrPtr, IRB);
895 Type *ShadowTy = IntegerType::get(*Ctx, 8U);
896 Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
897 // The bottom bit is used for the current sampling period's working set.
898 // The top bit is used for the total working set. We set both on each
899 // memory access, if they are not already set.
900 Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B
902 Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
903 // The AND and CMP will be turned into a TEST instruction by the compiler.
904 Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask);
905 TerminatorInst *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
906 // FIXME: do I need to call SetCurrentDebugLocation?
907 IRB.SetInsertPoint(CmpTerm);
908 // We use OR to set the shadow bits to avoid corrupting the middle 6 bits,
909 // which are used by the runtime library.
910 Value *NewVal = IRB.CreateOr(OldValue, ValueMask);
911 IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
912 IRB.SetInsertPoint(I);