contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp

   1 //===- DivRemPairs.cpp - Hoist/decompose division and remainder -*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass hoists and/or decomposes integer division and remainder
  10 // instructions to enable CFG improvements and better codegen.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/Transforms/Scalar/DivRemPairs.h"
  15 #include "llvm/ADT/DenseMap.h"
  16 #include "llvm/ADT/MapVector.h"
  17 #include "llvm/ADT/Statistic.h"
  18 #include "llvm/Analysis/GlobalsModRef.h"
  19 #include "llvm/Analysis/TargetTransformInfo.h"
  20 #include "llvm/IR/Dominators.h"
  21 #include "llvm/IR/Function.h"
  22 #include "llvm/Pass.h"
  23 #include "llvm/Support/DebugCounter.h"
  24 #include "llvm/Transforms/Scalar.h"
  25 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
  26
  27 using namespace llvm;
  28
  29 #define DEBUG_TYPE "div-rem-pairs"
  30 STATISTIC(NumPairs, "Number of div/rem pairs");
  31 STATISTIC(NumHoisted, "Number of instructions hoisted");
  32 STATISTIC(NumDecomposed, "Number of instructions decomposed");
  33 DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
  34               "Controls transformations in div-rem-pairs pass");
  35
  36 /// A thin wrapper to store two values that we matched as div-rem pair.
  37 /// We want this extra indirection to avoid dealing with RAUW'ing the map keys.
  38 struct DivRemPairWorklistEntry {
  39   /// The actual udiv/sdiv instruction. Source of truth.
  40   AssertingVH<Instruction> DivInst;
  41
  42   /// The instruction that we have matched as a remainder instruction.
  43   /// Should only be used as Value, don't introspect it.
  44   AssertingVH<Instruction> RemInst;
  45
  46   DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_)
  47       : DivInst(DivInst_), RemInst(RemInst_) {
  48     assert((DivInst->getOpcode() == Instruction::UDiv ||
  49             DivInst->getOpcode() == Instruction::SDiv) &&
  50            "Not a division.");
  51     assert(DivInst->getType() == RemInst->getType() && "Types should match.");
  52     // We can't check anything else about remainder instruction,
  53     // it's not strictly required to be a urem/srem.
  54   }
  55
  56   /// The type for this pair, identical for both the div and rem.
  57   Type *getType() const { return DivInst->getType(); }
  58
  59   /// Is this pair signed or unsigned?
  60   bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; }
  61
  62   /// In this pair, what are the divident and divisor?
  63   Value *getDividend() const { return DivInst->getOperand(0); }
  64   Value *getDivisor() const { return DivInst->getOperand(1); }
  65 };
  66 using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;
  67
  68 /// Find matching pairs of integer div/rem ops (they have the same numerator,
  69 /// denominator, and signedness). Place those pairs into a worklist for further
  70 /// processing. This indirection is needed because we have to use TrackingVH<>
  71 /// because we will be doing RAUW, and if one of the rem instructions we change
  72 /// happens to be an input to another div/rem in the maps, we'd have problems.
  73 static DivRemWorklistTy getWorklist(Function &F) {
  74   // Insert all divide and remainder instructions into maps keyed by their
  75   // operands and opcode (signed or unsigned).
  76   DenseMap<DivRemMapKey, Instruction *> DivMap;
  77   // Use a MapVector for RemMap so that instructions are moved/inserted in a
  78   // deterministic order.
  79   MapVector<DivRemMapKey, Instruction *> RemMap;
  80   for (auto &BB : F) {
  81     for (auto &I : BB) {
  82       if (I.getOpcode() == Instruction::SDiv)
  83         DivMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
  84       else if (I.getOpcode() == Instruction::UDiv)
  85         DivMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
  86       else if (I.getOpcode() == Instruction::SRem)
  87         RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
  88       else if (I.getOpcode() == Instruction::URem)
  89         RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
  90     }
  91   }
  92
  93   // We'll accumulate the matching pairs of div-rem instructions here.
  94   DivRemWorklistTy Worklist;
  95
  96   // We can iterate over either map because we are only looking for matched
  97   // pairs. Choose remainders for efficiency because they are usually even more
  98   // rare than division.
  99   for (auto &RemPair : RemMap) {
 100     // Find the matching division instruction from the division map.
 101     Instruction *DivInst = DivMap[RemPair.first];
 102     if (!DivInst)
 103       continue;
 104
 105     // We have a matching pair of div/rem instructions.
 106     NumPairs++;
 107     Instruction *RemInst = RemPair.second;
 108
 109     // Place it in the worklist.
 110     Worklist.emplace_back(DivInst, RemInst);
 111   }
 112
 113   return Worklist;
 114 }
 115
 116 /// Find matching pairs of integer div/rem ops (they have the same numerator,
 117 /// denominator, and signedness). If they exist in different basic blocks, bring
 118 /// them together by hoisting or replace the common division operation that is
 119 /// implicit in the remainder:
 120 /// X % Y <--> X - ((X / Y) * Y).
 121 ///
 122 /// We can largely ignore the normal safety and cost constraints on speculation
 123 /// of these ops when we find a matching pair. This is because we are already
 124 /// guaranteed that any exceptions and most cost are already incurred by the
 125 /// first member of the pair.
 126 ///
 127 /// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
 128 /// SimplifyCFG, but it's split off on its own because it's different enough
 129 /// that it doesn't quite match the stated objectives of those passes.
 130 static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
 131                            const DominatorTree &DT) {
 132   bool Changed = false;
 133
 134   // Get the matching pairs of div-rem instructions. We want this extra
 135   // indirection to avoid dealing with having to RAUW the keys of the maps.
 136   DivRemWorklistTy Worklist = getWorklist(F);
 137
 138   // Process each entry in the worklist.
 139   for (DivRemPairWorklistEntry &E : Worklist) {
 140     bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());
 141
 142     auto &DivInst = E.DivInst;
 143     auto &RemInst = E.RemInst;
 144
 145     // If the target supports div+rem and the instructions are in the same block
 146     // already, there's nothing to do. The backend should handle this. If the
 147     // target does not support div+rem, then we will decompose the rem.
 148     if (HasDivRemOp && RemInst->getParent() == DivInst->getParent())
 149       continue;
 150
 151     bool DivDominates = DT.dominates(DivInst, RemInst);
 152     if (!DivDominates && !DT.dominates(RemInst, DivInst))
 153       continue;
 154
 155     if (!DebugCounter::shouldExecute(DRPCounter))
 156       continue;
 157
 158     if (HasDivRemOp) {
 159       // The target has a single div/rem operation. Hoist the lower instruction
 160       // to make the matched pair visible to the backend.
 161       if (DivDominates)
 162         RemInst->moveAfter(DivInst);
 163       else
 164         DivInst->moveAfter(RemInst);
 165       NumHoisted++;
 166     } else {
 167       // The target does not have a single div/rem operation. Decompose the
 168       // remainder calculation as:
 169       // X % Y --> X - ((X / Y) * Y).
 170       Value *X = E.getDividend();
 171       Value *Y = E.getDivisor();
 172       Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
 173       Instruction *Sub = BinaryOperator::CreateSub(X, Mul);
 174
 175       // If the remainder dominates, then hoist the division up to that block:
 176       //
 177       // bb1:
 178       //   %rem = srem %x, %y
 179       // bb2:
 180       //   %div = sdiv %x, %y
 181       // -->
 182       // bb1:
 183       //   %div = sdiv %x, %y
 184       //   %mul = mul %div, %y
 185       //   %rem = sub %x, %mul
 186       //
 187       // If the division dominates, it's already in the right place. The mul+sub
 188       // will be in a different block because we don't assume that they are
 189       // cheap to speculatively execute:
 190       //
 191       // bb1:
 192       //   %div = sdiv %x, %y
 193       // bb2:
 194       //   %rem = srem %x, %y
 195       // -->
 196       // bb1:
 197       //   %div = sdiv %x, %y
 198       // bb2:
 199       //   %mul = mul %div, %y
 200       //   %rem = sub %x, %mul
 201       //
 202       // If the div and rem are in the same block, we do the same transform,
 203       // but any code movement would be within the same block.
 204
 205       if (!DivDominates)
 206         DivInst->moveBefore(RemInst);
 207       Mul->insertAfter(RemInst);
 208       Sub->insertAfter(Mul);
 209
 210       // Now kill the explicit remainder. We have replaced it with:
 211       // (sub X, (mul (div X, Y), Y)
 212       Sub->setName(RemInst->getName() + ".decomposed");
 213       Instruction *OrigRemInst = RemInst;
 214       // Update AssertingVH<> with new instruction so it doesn't assert.
 215       RemInst = Sub;
 216       // And replace the original instruction with the new one.
 217       OrigRemInst->replaceAllUsesWith(Sub);
 218       OrigRemInst->eraseFromParent();
 219       NumDecomposed++;
 220     }
 221     Changed = true;
 222   }
 223
 224   return Changed;
 225 }
 226
 227 // Pass manager boilerplate below here.
 228
 229 namespace {
 230 struct DivRemPairsLegacyPass : public FunctionPass {
 231   static char ID;
 232   DivRemPairsLegacyPass() : FunctionPass(ID) {
 233     initializeDivRemPairsLegacyPassPass(*PassRegistry::getPassRegistry());
 234   }
 235
 236   void getAnalysisUsage(AnalysisUsage &AU) const override {
 237     AU.addRequired<DominatorTreeWrapperPass>();
 238     AU.addRequired<TargetTransformInfoWrapperPass>();
 239     AU.setPreservesCFG();
 240     AU.addPreserved<DominatorTreeWrapperPass>();
 241     AU.addPreserved<GlobalsAAWrapperPass>();
 242     FunctionPass::getAnalysisUsage(AU);
 243   }
 244
 245   bool runOnFunction(Function &F) override {
 246     if (skipFunction(F))
 247       return false;
 248     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
 249     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 250     return optimizeDivRem(F, TTI, DT);
 251   }
 252 };
 253 } // namespace
 254
 255 char DivRemPairsLegacyPass::ID = 0;
 256 INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
 257                       "Hoist/decompose integer division and remainder", false,
 258                       false)
 259 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 260 INITIALIZE_PASS_END(DivRemPairsLegacyPass, "div-rem-pairs",
 261                     "Hoist/decompose integer division and remainder", false,
 262                     false)
 263 FunctionPass *llvm::createDivRemPairsPass() {
 264   return new DivRemPairsLegacyPass();
 265 }
 266
 267 PreservedAnalyses DivRemPairsPass::run(Function &F,
 268                                        FunctionAnalysisManager &FAM) {
 269   TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
 270   DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
 271   if (!optimizeDivRem(F, TTI, DT))
 272     return PreservedAnalyses::all();
 273   // TODO: This pass just hoists/replaces math ops - all analyses are preserved?
 274   PreservedAnalyses PA;
 275   PA.preserveSet<CFGAnalyses>();
 276   PA.preserve<GlobalsAA>();
 277   return PA;
 278 }