1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains an optimization for div and rem on architectures that
11 // execute short instructions significantly faster than longer instructions.
12 // For example, on Intel Atom 32-bit divides are slow enough that during
13 // runtime it is profitable to check the value of the operands, and if they are
14 // positive and less than 256 use an unsigned 8-bit divide.
16 //===----------------------------------------------------------------------===//
18 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instructions.h"
26 #define DEBUG_TYPE "bypass-slow-division"
34 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
35 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
42 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
43 : Quotient(InQuotient), Remainder(InRemainder) {}
49 struct DenseMapInfo<DivOpInfo> {
50 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
51 return Val1.SignedOp == Val2.SignedOp &&
52 Val1.Dividend == Val2.Dividend &&
53 Val1.Divisor == Val2.Divisor;
56 static DivOpInfo getEmptyKey() {
57 return DivOpInfo(false, nullptr, nullptr);
60 static DivOpInfo getTombstoneKey() {
61 return DivOpInfo(true, nullptr, nullptr);
64 static unsigned getHashValue(const DivOpInfo &Val) {
65 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
66 reinterpret_cast<uintptr_t>(Val.Divisor)) ^
67 (unsigned)Val.SignedOp;
71 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
74 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
75 // value of the operands and uses a shorter-faster div/rem instruction when
76 // possible and the longer-slower div/rem instruction otherwise.
77 static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
78 bool UseDivOp, bool UseSignedOp,
79 DivCacheTy &PerBBDivCache) {
80 Function *F = I->getParent()->getParent();
81 // Get instruction operands
82 Value *Dividend = I->getOperand(0);
83 Value *Divisor = I->getOperand(1);
85 if (isa<ConstantInt>(Divisor) ||
86 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
87 // Operations with immediate values should have
88 // been solved and replaced during compile time.
92 // Basic Block is split before divide
93 BasicBlock *MainBB = &*I->getParent();
94 BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
96 // Add new basic block for slow divide operation
98 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
99 SlowBB->moveBefore(SuccessorBB);
100 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
101 Value *SlowQuotientV;
102 Value *SlowRemainderV;
104 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
105 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
107 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
108 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
110 SlowBuilder.CreateBr(SuccessorBB);
112 // Add new basic block for fast divide operation
114 BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
115 FastBB->moveBefore(SlowBB);
116 IRBuilder<> FastBuilder(FastBB, FastBB->begin());
117 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
119 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
122 // udiv/urem because optimization only handles positive numbers
123 Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
125 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
127 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
129 Dividend->getType());
130 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
132 Dividend->getType());
133 FastBuilder.CreateBr(SuccessorBB);
135 // Phi nodes for result of div and rem
136 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
137 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
138 QuoPhi->addIncoming(SlowQuotientV, SlowBB);
139 QuoPhi->addIncoming(FastQuotientV, FastBB);
140 PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
141 RemPhi->addIncoming(SlowRemainderV, SlowBB);
142 RemPhi->addIncoming(FastRemainderV, FastBB);
144 // Replace I with appropriate phi node
146 I->replaceAllUsesWith(QuoPhi);
148 I->replaceAllUsesWith(RemPhi);
149 I->eraseFromParent();
151 // Combine operands into a single value with OR for value testing below
152 MainBB->getInstList().back().eraseFromParent();
153 IRBuilder<> MainBuilder(MainBB, MainBB->end());
154 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
156 // BitMask is inverted to check if the operands are
157 // larger than the bypass type
158 uint64_t BitMask = ~BypassType->getBitMask();
159 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
161 // Compare operand values and branch
162 Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
163 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
164 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
166 // Cache phi nodes to be used later in place of other instances
167 // of div or rem with the same sign, dividend, and divisor
168 DivOpInfo Key(UseSignedOp, Dividend, Divisor);
169 DivPhiNodes Value(QuoPhi, RemPhi);
170 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
174 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
175 // the current BB if operands and operation are identical. Otherwise calls
176 // insertFastDiv to perform the optimization and caches the resulting dividend
178 static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
179 bool UseDivOp, bool UseSignedOp,
180 DivCacheTy &PerBBDivCache) {
181 // Get instruction operands
182 DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
183 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
185 if (CacheI == PerBBDivCache.end()) {
186 // If previous instance does not exist, insert fast div
187 return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
190 // Replace operation value with previously generated phi node
191 DivPhiNodes &Value = CacheI->second;
193 // Replace all uses of div instruction with quotient phi node
194 I->replaceAllUsesWith(Value.Quotient);
196 // Replace all uses of rem instruction with remainder phi node
197 I->replaceAllUsesWith(Value.Remainder);
200 // Remove redundant operation
201 I->eraseFromParent();
205 // bypassSlowDivision - This optimization identifies DIV instructions in a BB
206 // that can be profitably bypassed and carried out with a shorter, faster
208 bool llvm::bypassSlowDivision(
209 BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
212 bool MadeChange = false;
213 Instruction* Next = &*BB->begin();
214 while (Next != nullptr) {
215 // We may add instructions immediately after I, but we want to skip over
217 Instruction* I = Next;
218 Next = Next->getNextNode();
220 // Get instruction details
221 unsigned Opcode = I->getOpcode();
222 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
223 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
224 bool UseSignedOp = Opcode == Instruction::SDiv ||
225 Opcode == Instruction::SRem;
227 // Only optimize div or rem ops
228 if (!UseDivOp && !UseRemOp)
231 // Skip division on vector types, only optimize integer instructions
232 if (!I->getType()->isIntegerTy())
235 // Get bitwidth of div/rem instruction
236 IntegerType *T = cast<IntegerType>(I->getType());
237 unsigned int bitwidth = T->getBitWidth();
239 // Continue if bitwidth is not bypassed
240 DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
241 if (BI == BypassWidths.end())
244 // Get type for div/rem instruction with bypass bitwidth
245 IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
247 MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);