contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp

   1 //===- DivRemPairs.cpp - Hoist/[dr]ecompose division and remainder --------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass hoists and/or decomposes/recomposes integer division and remainder
  10 // instructions to enable CFG improvements and better codegen.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/Transforms/Scalar/DivRemPairs.h"
  15 #include "llvm/ADT/DenseMap.h"
  16 #include "llvm/ADT/MapVector.h"
  17 #include "llvm/ADT/Statistic.h"
  18 #include "llvm/Analysis/GlobalsModRef.h"
  19 #include "llvm/Analysis/TargetTransformInfo.h"
  20 #include "llvm/IR/Dominators.h"
  21 #include "llvm/IR/Function.h"
  22 #include "llvm/IR/PatternMatch.h"
  23 #include "llvm/InitializePasses.h"
  24 #include "llvm/Pass.h"
  25 #include "llvm/Support/DebugCounter.h"
  26 #include "llvm/Transforms/Scalar.h"
  27 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
  28
  29 using namespace llvm;
  30 using namespace llvm::PatternMatch;
  31
  32 #define DEBUG_TYPE "div-rem-pairs"
  33 STATISTIC(NumPairs, "Number of div/rem pairs");
  34 STATISTIC(NumRecomposed, "Number of instructions recomposed");
  35 STATISTIC(NumHoisted, "Number of instructions hoisted");
  36 STATISTIC(NumDecomposed, "Number of instructions decomposed");
  37 DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
  38               "Controls transformations in div-rem-pairs pass");
  39
  40 namespace {
  41 struct ExpandedMatch {
  42   DivRemMapKey Key;
  43   Instruction *Value;
  44 };
  45 } // namespace
  46
  47 /// See if we can match: (which is the form we expand into)
  48 ///   X - ((X ?/ Y) * Y)
  49 /// which is equivalent to:
  50 ///   X ?% Y
  51 static llvm::Optional<ExpandedMatch> matchExpandedRem(Instruction &I) {
  52   Value *Dividend, *XroundedDownToMultipleOfY;
  53   if (!match(&I, m_Sub(m_Value(Dividend), m_Value(XroundedDownToMultipleOfY))))
  54     return llvm::None;
  55
  56   Value *Divisor;
  57   Instruction *Div;
  58   // Look for  ((X / Y) * Y)
  59   if (!match(
  60           XroundedDownToMultipleOfY,
  61           m_c_Mul(m_CombineAnd(m_IDiv(m_Specific(Dividend), m_Value(Divisor)),
  62                                m_Instruction(Div)),
  63                   m_Deferred(Divisor))))
  64     return llvm::None;
  65
  66   ExpandedMatch M;
  67   M.Key.SignedOp = Div->getOpcode() == Instruction::SDiv;
  68   M.Key.Dividend = Dividend;
  69   M.Key.Divisor = Divisor;
  70   M.Value = &I;
  71   return M;
  72 }
  73
  74 /// A thin wrapper to store two values that we matched as div-rem pair.
  75 /// We want this extra indirection to avoid dealing with RAUW'ing the map keys.
  76 struct DivRemPairWorklistEntry {
  77   /// The actual udiv/sdiv instruction. Source of truth.
  78   AssertingVH<Instruction> DivInst;
  79
  80   /// The instruction that we have matched as a remainder instruction.
  81   /// Should only be used as Value, don't introspect it.
  82   AssertingVH<Instruction> RemInst;
  83
  84   DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_)
  85       : DivInst(DivInst_), RemInst(RemInst_) {
  86     assert((DivInst->getOpcode() == Instruction::UDiv ||
  87             DivInst->getOpcode() == Instruction::SDiv) &&
  88            "Not a division.");
  89     assert(DivInst->getType() == RemInst->getType() && "Types should match.");
  90     // We can't check anything else about remainder instruction,
  91     // it's not strictly required to be a urem/srem.
  92   }
  93
  94   /// The type for this pair, identical for both the div and rem.
  95   Type *getType() const { return DivInst->getType(); }
  96
  97   /// Is this pair signed or unsigned?
  98   bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; }
  99
 100   /// In this pair, what are the divident and divisor?
 101   Value *getDividend() const { return DivInst->getOperand(0); }
 102   Value *getDivisor() const { return DivInst->getOperand(1); }
 103
 104   bool isRemExpanded() const {
 105     switch (RemInst->getOpcode()) {
 106     case Instruction::SRem:
 107     case Instruction::URem:
 108       return false; // single 'rem' instruction - unexpanded form.
 109     default:
 110       return true; // anything else means we have remainder in expanded form.
 111     }
 112   }
 113 };
 114 using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;
 115
 116 /// Find matching pairs of integer div/rem ops (they have the same numerator,
 117 /// denominator, and signedness). Place those pairs into a worklist for further
 118 /// processing. This indirection is needed because we have to use TrackingVH<>
 119 /// because we will be doing RAUW, and if one of the rem instructions we change
 120 /// happens to be an input to another div/rem in the maps, we'd have problems.
 121 static DivRemWorklistTy getWorklist(Function &F) {
 122   // Insert all divide and remainder instructions into maps keyed by their
 123   // operands and opcode (signed or unsigned).
 124   DenseMap<DivRemMapKey, Instruction *> DivMap;
 125   // Use a MapVector for RemMap so that instructions are moved/inserted in a
 126   // deterministic order.
 127   MapVector<DivRemMapKey, Instruction *> RemMap;
 128   for (auto &BB : F) {
 129     for (auto &I : BB) {
 130       if (I.getOpcode() == Instruction::SDiv)
 131         DivMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
 132       else if (I.getOpcode() == Instruction::UDiv)
 133         DivMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
 134       else if (I.getOpcode() == Instruction::SRem)
 135         RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
 136       else if (I.getOpcode() == Instruction::URem)
 137         RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
 138       else if (auto Match = matchExpandedRem(I))
 139         RemMap[Match->Key] = Match->Value;
 140     }
 141   }
 142
 143   // We'll accumulate the matching pairs of div-rem instructions here.
 144   DivRemWorklistTy Worklist;
 145
 146   // We can iterate over either map because we are only looking for matched
 147   // pairs. Choose remainders for efficiency because they are usually even more
 148   // rare than division.
 149   for (auto &RemPair : RemMap) {
 150     // Find the matching division instruction from the division map.
 151     Instruction *DivInst = DivMap[RemPair.first];
 152     if (!DivInst)
 153       continue;
 154
 155     // We have a matching pair of div/rem instructions.
 156     NumPairs++;
 157     Instruction *RemInst = RemPair.second;
 158
 159     // Place it in the worklist.
 160     Worklist.emplace_back(DivInst, RemInst);
 161   }
 162
 163   return Worklist;
 164 }
 165
 166 /// Find matching pairs of integer div/rem ops (they have the same numerator,
 167 /// denominator, and signedness). If they exist in different basic blocks, bring
 168 /// them together by hoisting or replace the common division operation that is
 169 /// implicit in the remainder:
 170 /// X % Y <--> X - ((X / Y) * Y).
 171 ///
 172 /// We can largely ignore the normal safety and cost constraints on speculation
 173 /// of these ops when we find a matching pair. This is because we are already
 174 /// guaranteed that any exceptions and most cost are already incurred by the
 175 /// first member of the pair.
 176 ///
 177 /// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
 178 /// SimplifyCFG, but it's split off on its own because it's different enough
 179 /// that it doesn't quite match the stated objectives of those passes.
 180 static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
 181                            const DominatorTree &DT) {
 182   bool Changed = false;
 183
 184   // Get the matching pairs of div-rem instructions. We want this extra
 185   // indirection to avoid dealing with having to RAUW the keys of the maps.
 186   DivRemWorklistTy Worklist = getWorklist(F);
 187
 188   // Process each entry in the worklist.
 189   for (DivRemPairWorklistEntry &E : Worklist) {
 190     if (!DebugCounter::shouldExecute(DRPCounter))
 191       continue;
 192
 193     bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());
 194
 195     auto &DivInst = E.DivInst;
 196     auto &RemInst = E.RemInst;
 197
 198     const bool RemOriginallyWasInExpandedForm = E.isRemExpanded();
 199     (void)RemOriginallyWasInExpandedForm; // suppress unused variable warning
 200
 201     if (HasDivRemOp && E.isRemExpanded()) {
 202       // The target supports div+rem but the rem is expanded.
 203       // We should recompose it first.
 204       Value *X = E.getDividend();
 205       Value *Y = E.getDivisor();
 206       Instruction *RealRem = E.isSigned() ? BinaryOperator::CreateSRem(X, Y)
 207                                           : BinaryOperator::CreateURem(X, Y);
 208       // Note that we place it right next to the original expanded instruction,
 209       // and letting further handling to move it if needed.
 210       RealRem->setName(RemInst->getName() + ".recomposed");
 211       RealRem->insertAfter(RemInst);
 212       Instruction *OrigRemInst = RemInst;
 213       // Update AssertingVH<> with new instruction so it doesn't assert.
 214       RemInst = RealRem;
 215       // And replace the original instruction with the new one.
 216       OrigRemInst->replaceAllUsesWith(RealRem);
 217       OrigRemInst->eraseFromParent();
 218       NumRecomposed++;
 219       // Note that we have left ((X / Y) * Y) around.
 220       // If it had other uses we could rewrite it as X - X % Y
 221     }
 222
 223     assert((!E.isRemExpanded() || !HasDivRemOp) &&
 224            "*If* the target supports div-rem, then by now the RemInst *is* "
 225            "Instruction::[US]Rem.");
 226
 227     // If the target supports div+rem and the instructions are in the same block
 228     // already, there's nothing to do. The backend should handle this. If the
 229     // target does not support div+rem, then we will decompose the rem.
 230     if (HasDivRemOp && RemInst->getParent() == DivInst->getParent())
 231       continue;
 232
 233     bool DivDominates = DT.dominates(DivInst, RemInst);
 234     if (!DivDominates && !DT.dominates(RemInst, DivInst)) {
 235       // We have matching div-rem pair, but they are in two different blocks,
 236       // neither of which dominates one another.
 237       // FIXME: We could hoist both ops to the common predecessor block?
 238       continue;
 239     }
 240
 241     // The target does not have a single div/rem operation,
 242     // and the rem is already in expanded form. Nothing to do.
 243     if (!HasDivRemOp && E.isRemExpanded())
 244       continue;
 245
 246     if (HasDivRemOp) {
 247       // The target has a single div/rem operation. Hoist the lower instruction
 248       // to make the matched pair visible to the backend.
 249       if (DivDominates)
 250         RemInst->moveAfter(DivInst);
 251       else
 252         DivInst->moveAfter(RemInst);
 253       NumHoisted++;
 254     } else {
 255       // The target does not have a single div/rem operation,
 256       // and the rem is *not* in a already-expanded form.
 257       // Decompose the remainder calculation as:
 258       // X % Y --> X - ((X / Y) * Y).
 259
 260       assert(!RemOriginallyWasInExpandedForm &&
 261              "We should not be expanding if the rem was in expanded form to "
 262              "begin with.");
 263
 264       Value *X = E.getDividend();
 265       Value *Y = E.getDivisor();
 266       Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
 267       Instruction *Sub = BinaryOperator::CreateSub(X, Mul);
 268
 269       // If the remainder dominates, then hoist the division up to that block:
 270       //
 271       // bb1:
 272       //   %rem = srem %x, %y
 273       // bb2:
 274       //   %div = sdiv %x, %y
 275       // -->
 276       // bb1:
 277       //   %div = sdiv %x, %y
 278       //   %mul = mul %div, %y
 279       //   %rem = sub %x, %mul
 280       //
 281       // If the division dominates, it's already in the right place. The mul+sub
 282       // will be in a different block because we don't assume that they are
 283       // cheap to speculatively execute:
 284       //
 285       // bb1:
 286       //   %div = sdiv %x, %y
 287       // bb2:
 288       //   %rem = srem %x, %y
 289       // -->
 290       // bb1:
 291       //   %div = sdiv %x, %y
 292       // bb2:
 293       //   %mul = mul %div, %y
 294       //   %rem = sub %x, %mul
 295       //
 296       // If the div and rem are in the same block, we do the same transform,
 297       // but any code movement would be within the same block.
 298
 299       if (!DivDominates)
 300         DivInst->moveBefore(RemInst);
 301       Mul->insertAfter(RemInst);
 302       Sub->insertAfter(Mul);
 303
 304       // Now kill the explicit remainder. We have replaced it with:
 305       // (sub X, (mul (div X, Y), Y)
 306       Sub->setName(RemInst->getName() + ".decomposed");
 307       Instruction *OrigRemInst = RemInst;
 308       // Update AssertingVH<> with new instruction so it doesn't assert.
 309       RemInst = Sub;
 310       // And replace the original instruction with the new one.
 311       OrigRemInst->replaceAllUsesWith(Sub);
 312       OrigRemInst->eraseFromParent();
 313       NumDecomposed++;
 314     }
 315     Changed = true;
 316   }
 317
 318   return Changed;
 319 }
 320
 321 // Pass manager boilerplate below here.
 322
 323 namespace {
 324 struct DivRemPairsLegacyPass : public FunctionPass {
 325   static char ID;
 326   DivRemPairsLegacyPass() : FunctionPass(ID) {
 327     initializeDivRemPairsLegacyPassPass(*PassRegistry::getPassRegistry());
 328   }
 329
 330   void getAnalysisUsage(AnalysisUsage &AU) const override {
 331     AU.addRequired<DominatorTreeWrapperPass>();
 332     AU.addRequired<TargetTransformInfoWrapperPass>();
 333     AU.setPreservesCFG();
 334     AU.addPreserved<DominatorTreeWrapperPass>();
 335     AU.addPreserved<GlobalsAAWrapperPass>();
 336     FunctionPass::getAnalysisUsage(AU);
 337   }
 338
 339   bool runOnFunction(Function &F) override {
 340     if (skipFunction(F))
 341       return false;
 342     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
 343     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 344     return optimizeDivRem(F, TTI, DT);
 345   }
 346 };
 347 } // namespace
 348
 349 char DivRemPairsLegacyPass::ID = 0;
 350 INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
 351                       "Hoist/decompose integer division and remainder", false,
 352                       false)
 353 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 354 INITIALIZE_PASS_END(DivRemPairsLegacyPass, "div-rem-pairs",
 355                     "Hoist/decompose integer division and remainder", false,
 356                     false)
 357 FunctionPass *llvm::createDivRemPairsPass() {
 358   return new DivRemPairsLegacyPass();
 359 }
 360
 361 PreservedAnalyses DivRemPairsPass::run(Function &F,
 362                                        FunctionAnalysisManager &FAM) {
 363   TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
 364   DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
 365   if (!optimizeDivRem(F, TTI, DT))
 366     return PreservedAnalyses::all();
 367   // TODO: This pass just hoists/replaces math ops - all analyses are preserved?
 368   PreservedAnalyses PA;
 369   PA.preserveSet<CFGAnalyses>();
 370   PA.preserve<GlobalsAA>();
 371   return PA;
 372 }