contrib/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

   1 //===- AggressiveInstCombine.cpp ------------------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the aggressive expression pattern combiner classes.
  11 // Currently, it handles expression patterns for:
  12 //  * Truncate instruction
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
  17 #include "AggressiveInstCombineInternal.h"
  18 #include "llvm-c/Initialization.h"
  19 #include "llvm-c/Transforms/AggressiveInstCombine.h"
  20 #include "llvm/Analysis/AliasAnalysis.h"
  21 #include "llvm/Analysis/BasicAliasAnalysis.h"
  22 #include "llvm/Analysis/GlobalsModRef.h"
  23 #include "llvm/Analysis/TargetLibraryInfo.h"
  24 #include "llvm/IR/DataLayout.h"
  25 #include "llvm/IR/Dominators.h"
  26 #include "llvm/IR/IRBuilder.h"
  27 #include "llvm/IR/LegacyPassManager.h"
  28 #include "llvm/IR/PatternMatch.h"
  29 #include "llvm/Pass.h"
  30 #include "llvm/Transforms/Utils/Local.h"
  31 using namespace llvm;
  32 using namespace PatternMatch;
  33
  34 #define DEBUG_TYPE "aggressive-instcombine"
  35
  36 namespace {
  37 /// Contains expression pattern combiner logic.
  38 /// This class provides both the logic to combine expression patterns and
  39 /// combine them. It differs from InstCombiner class in that each pattern
  40 /// combiner runs only once as opposed to InstCombine's multi-iteration,
  41 /// which allows pattern combiner to have higher complexity than the O(1)
  42 /// required by the instruction combiner.
  43 class AggressiveInstCombinerLegacyPass : public FunctionPass {
  44 public:
  45   static char ID; // Pass identification, replacement for typeid
  46
  47   AggressiveInstCombinerLegacyPass() : FunctionPass(ID) {
  48     initializeAggressiveInstCombinerLegacyPassPass(
  49         *PassRegistry::getPassRegistry());
  50   }
  51
  52   void getAnalysisUsage(AnalysisUsage &AU) const override;
  53
  54   /// Run all expression pattern optimizations on the given /p F function.
  55   ///
  56   /// \param F function to optimize.
  57   /// \returns true if the IR is changed.
  58   bool runOnFunction(Function &F) override;
  59 };
  60 } // namespace
  61
  62 /// Match a pattern for a bitwise rotate operation that partially guards
  63 /// against undefined behavior by branching around the rotation when the shift
  64 /// amount is 0.
  65 static bool foldGuardedRotateToFunnelShift(Instruction &I) {
  66   if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
  67     return false;
  68
  69   // As with the one-use checks below, this is not strictly necessary, but we
  70   // are being cautious to avoid potential perf regressions on targets that
  71   // do not actually have a rotate instruction (where the funnel shift would be
  72   // expanded back into math/shift/logic ops).
  73   if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
  74     return false;
  75
  76   // Match V to funnel shift left/right and capture the source operand and
  77   // shift amount in X and Y.
  78   auto matchRotate = [](Value *V, Value *&X, Value *&Y) {
  79     Value *L0, *L1, *R0, *R1;
  80     unsigned Width = V->getType()->getScalarSizeInBits();
  81     auto Sub = m_Sub(m_SpecificInt(Width), m_Value(R1));
  82
  83     // rotate_left(X, Y) == (X << Y) | (X >> (Width - Y))
  84     auto RotL = m_OneUse(
  85         m_c_Or(m_Shl(m_Value(L0), m_Value(L1)), m_LShr(m_Value(R0), Sub)));
  86     if (RotL.match(V) && L0 == R0 && L1 == R1) {
  87       X = L0;
  88       Y = L1;
  89       return Intrinsic::fshl;
  90     }
  91
  92     // rotate_right(X, Y) == (X >> Y) | (X << (Width - Y))
  93     auto RotR = m_OneUse(
  94         m_c_Or(m_LShr(m_Value(L0), m_Value(L1)), m_Shl(m_Value(R0), Sub)));
  95     if (RotR.match(V) && L0 == R0 && L1 == R1) {
  96       X = L0;
  97       Y = L1;
  98       return Intrinsic::fshr;
  99     }
 100
 101     return Intrinsic::not_intrinsic;
 102   };
 103
 104   // One phi operand must be a rotate operation, and the other phi operand must
 105   // be the source value of that rotate operation:
 106   // phi [ rotate(RotSrc, RotAmt), RotBB ], [ RotSrc, GuardBB ]
 107   PHINode &Phi = cast<PHINode>(I);
 108   Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
 109   Value *RotSrc, *RotAmt;
 110   Intrinsic::ID IID = matchRotate(P0, RotSrc, RotAmt);
 111   if (IID == Intrinsic::not_intrinsic || RotSrc != P1) {
 112     IID = matchRotate(P1, RotSrc, RotAmt);
 113     if (IID == Intrinsic::not_intrinsic || RotSrc != P0)
 114       return false;
 115     assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
 116            "Pattern must match funnel shift left or right");
 117   }
 118
 119   // The incoming block with our source operand must be the "guard" block.
 120   // That must contain a cmp+branch to avoid the rotate when the shift amount
 121   // is equal to 0. The other incoming block is the block with the rotate.
 122   BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1);
 123   BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1);
 124   Instruction *TermI = GuardBB->getTerminator();
 125   BasicBlock *TrueBB, *FalseBB;
 126   ICmpInst::Predicate Pred;
 127   if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()), TrueBB,
 128                          FalseBB)))
 129     return false;
 130
 131   BasicBlock *PhiBB = Phi.getParent();
 132   if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB)
 133     return false;
 134
 135   // We matched a variation of this IR pattern:
 136   // GuardBB:
 137   //   %cmp = icmp eq i32 %RotAmt, 0
 138   //   br i1 %cmp, label %PhiBB, label %RotBB
 139   // RotBB:
 140   //   %sub = sub i32 32, %RotAmt
 141   //   %shr = lshr i32 %X, %sub
 142   //   %shl = shl i32 %X, %RotAmt
 143   //   %rot = or i32 %shr, %shl
 144   //   br label %PhiBB
 145   // PhiBB:
 146   //   %cond = phi i32 [ %rot, %RotBB ], [ %X, %GuardBB ]
 147   // -->
 148   // llvm.fshl.i32(i32 %X, i32 %RotAmt)
 149   IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
 150   Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
 151   Phi.replaceAllUsesWith(Builder.CreateCall(F, {RotSrc, RotSrc, RotAmt}));
 152   return true;
 153 }
 154
 155 /// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
 156 /// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
 157 /// of 'and' ops, then we also need to capture the fact that we saw an
 158 /// "and X, 1", so that's an extra return value for that case.
 159 struct MaskOps {
 160   Value *Root;
 161   APInt Mask;
 162   bool MatchAndChain;
 163   bool FoundAnd1;
 164
 165   MaskOps(unsigned BitWidth, bool MatchAnds)
 166       : Root(nullptr), Mask(APInt::getNullValue(BitWidth)),
 167         MatchAndChain(MatchAnds), FoundAnd1(false) {}
 168 };
 169
 170 /// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
 171 /// chain of 'and' or 'or' instructions looking for shift ops of a common source
 172 /// value. Examples:
 173 ///   or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)
 174 /// returns { X, 0x129 }
 175 ///   and (and (X >> 1), 1), (X >> 4)
 176 /// returns { X, 0x12 }
 177 static bool matchAndOrChain(Value *V, MaskOps &MOps) {
 178   Value *Op0, *Op1;
 179   if (MOps.MatchAndChain) {
 180     // Recurse through a chain of 'and' operands. This requires an extra check
 181     // vs. the 'or' matcher: we must find an "and X, 1" instruction somewhere
 182     // in the chain to know that all of the high bits are cleared.
 183     if (match(V, m_And(m_Value(Op0), m_One()))) {
 184       MOps.FoundAnd1 = true;
 185       return matchAndOrChain(Op0, MOps);
 186     }
 187     if (match(V, m_And(m_Value(Op0), m_Value(Op1))))
 188       return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
 189   } else {
 190     // Recurse through a chain of 'or' operands.
 191     if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))
 192       return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
 193   }
 194
 195   // We need a shift-right or a bare value representing a compare of bit 0 of
 196   // the original source operand.
 197   Value *Candidate;
 198   uint64_t BitIndex = 0;
 199   if (!match(V, m_LShr(m_Value(Candidate), m_ConstantInt(BitIndex))))
 200     Candidate = V;
 201
 202   // Initialize result source operand.
 203   if (!MOps.Root)
 204     MOps.Root = Candidate;
 205
 206   // The shift constant is out-of-range? This code hasn't been simplified.
 207   if (BitIndex >= MOps.Mask.getBitWidth())
 208     return false;
 209
 210   // Fill in the mask bit derived from the shift constant.
 211   MOps.Mask.setBit(BitIndex);
 212   return MOps.Root == Candidate;
 213 }
 214
 215 /// Match patterns that correspond to "any-bits-set" and "all-bits-set".
 216 /// These will include a chain of 'or' or 'and'-shifted bits from a
 217 /// common source value:
 218 /// and (or  (lshr X, C), ...), 1 --> (X & CMask) != 0
 219 /// and (and (lshr X, C), ...), 1 --> (X & CMask) == CMask
 220 /// Note: "any-bits-clear" and "all-bits-clear" are variations of these patterns
 221 /// that differ only with a final 'not' of the result. We expect that final
 222 /// 'not' to be folded with the compare that we create here (invert predicate).
 223 static bool foldAnyOrAllBitsSet(Instruction &I) {
 224   // The 'any-bits-set' ('or' chain) pattern is simpler to match because the
 225   // final "and X, 1" instruction must be the final op in the sequence.
 226   bool MatchAllBitsSet;
 227   if (match(&I, m_c_And(m_OneUse(m_And(m_Value(), m_Value())), m_Value())))
 228     MatchAllBitsSet = true;
 229   else if (match(&I, m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One())))
 230     MatchAllBitsSet = false;
 231   else
 232     return false;
 233
 234   MaskOps MOps(I.getType()->getScalarSizeInBits(), MatchAllBitsSet);
 235   if (MatchAllBitsSet) {
 236     if (!matchAndOrChain(cast<BinaryOperator>(&I), MOps) || !MOps.FoundAnd1)
 237       return false;
 238   } else {
 239     if (!matchAndOrChain(cast<BinaryOperator>(&I)->getOperand(0), MOps))
 240       return false;
 241   }
 242
 243   // The pattern was found. Create a masked compare that replaces all of the
 244   // shift and logic ops.
 245   IRBuilder<> Builder(&I);
 246   Constant *Mask = ConstantInt::get(I.getType(), MOps.Mask);
 247   Value *And = Builder.CreateAnd(MOps.Root, Mask);
 248   Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask)
 249                                : Builder.CreateIsNotNull(And);
 250   Value *Zext = Builder.CreateZExt(Cmp, I.getType());
 251   I.replaceAllUsesWith(Zext);
 252   return true;
 253 }
 254
 255 /// This is the entry point for folds that could be implemented in regular
 256 /// InstCombine, but they are separated because they are not expected to
 257 /// occur frequently and/or have more than a constant-length pattern match.
 258 static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
 259   bool MadeChange = false;
 260   for (BasicBlock &BB : F) {
 261     // Ignore unreachable basic blocks.
 262     if (!DT.isReachableFromEntry(&BB))
 263       continue;
 264     // Do not delete instructions under here and invalidate the iterator.
 265     // Walk the block backwards for efficiency. We're matching a chain of
 266     // use->defs, so we're more likely to succeed by starting from the bottom.
 267     // Also, we want to avoid matching partial patterns.
 268     // TODO: It would be more efficient if we removed dead instructions
 269     // iteratively in this loop rather than waiting until the end.
 270     for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
 271       MadeChange |= foldAnyOrAllBitsSet(I);
 272       MadeChange |= foldGuardedRotateToFunnelShift(I);
 273     }
 274   }
 275
 276   // We're done with transforms, so remove dead instructions.
 277   if (MadeChange)
 278     for (BasicBlock &BB : F)
 279       SimplifyInstructionsInBlock(&BB);
 280
 281   return MadeChange;
 282 }
 283
 284 /// This is the entry point for all transforms. Pass manager differences are
 285 /// handled in the callers of this function.
 286 static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT) {
 287   bool MadeChange = false;
 288   const DataLayout &DL = F.getParent()->getDataLayout();
 289   TruncInstCombine TIC(TLI, DL, DT);
 290   MadeChange |= TIC.run(F);
 291   MadeChange |= foldUnusualPatterns(F, DT);
 292   return MadeChange;
 293 }
 294
 295 void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
 296     AnalysisUsage &AU) const {
 297   AU.setPreservesCFG();
 298   AU.addRequired<DominatorTreeWrapperPass>();
 299   AU.addRequired<TargetLibraryInfoWrapperPass>();
 300   AU.addPreserved<AAResultsWrapperPass>();
 301   AU.addPreserved<BasicAAWrapperPass>();
 302   AU.addPreserved<DominatorTreeWrapperPass>();
 303   AU.addPreserved<GlobalsAAWrapperPass>();
 304 }
 305
 306 bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
 307   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 308   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 309   return runImpl(F, TLI, DT);
 310 }
 311
 312 PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
 313                                                  FunctionAnalysisManager &AM) {
 314   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
 315   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
 316   if (!runImpl(F, TLI, DT)) {
 317     // No changes, all analyses are preserved.
 318     return PreservedAnalyses::all();
 319   }
 320   // Mark all the analyses that instcombine updates as preserved.
 321   PreservedAnalyses PA;
 322   PA.preserveSet<CFGAnalyses>();
 323   PA.preserve<AAManager>();
 324   PA.preserve<GlobalsAA>();
 325   return PA;
 326 }
 327
 328 char AggressiveInstCombinerLegacyPass::ID = 0;
 329 INITIALIZE_PASS_BEGIN(AggressiveInstCombinerLegacyPass,
 330                       "aggressive-instcombine",
 331                       "Combine pattern based expressions", false, false)
 332 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 333 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 334 INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass, "aggressive-instcombine",
 335                     "Combine pattern based expressions", false, false)
 336
 337 // Initialization Routines
 338 void llvm::initializeAggressiveInstCombine(PassRegistry &Registry) {
 339   initializeAggressiveInstCombinerLegacyPassPass(Registry);
 340 }
 341
 342 void LLVMInitializeAggressiveInstCombiner(LLVMPassRegistryRef R) {
 343   initializeAggressiveInstCombinerLegacyPassPass(*unwrap(R));
 344 }
 345
 346 FunctionPass *llvm::createAggressiveInstCombinerPass() {
 347   return new AggressiveInstCombinerLegacyPass();
 348 }
 349
 350 void LLVMAddAggressiveInstCombinerPass(LLVMPassManagerRef PM) {
 351   unwrap(PM)->add(createAggressiveInstCombinerPass());
 352 }