contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

   1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// This file defines the LoopVectorizationLegality class. Original code
  12 /// in Loop Vectorizer has been moved out to its own file for modularity
  13 /// and reusability.
  14 ///
  15 /// Currently, it works for innermost loop vectorization. Extending this to
  16 /// outer loop vectorization is a TODO item.
  17 ///
  18 /// Also provides:
  19 /// 1) LoopVectorizeHints class which keeps a number of loop annotations
  20 /// locally for easy look up. It has the ability to write them back as
  21 /// loop metadata, upon request.
  22 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
  23 /// of reporting useful failure to vectorize message.
  24 //
  25 //===----------------------------------------------------------------------===//
  26
  27 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
  28 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
  29
  30 #include "llvm/ADT/MapVector.h"
  31 #include "llvm/Analysis/LoopAccessAnalysis.h"
  32 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  33 #include "llvm/Transforms/Utils/LoopUtils.h"
  34
  35 namespace llvm {
  36
  37 /// Create an analysis remark that explains why vectorization failed
  38 ///
  39 /// \p PassName is the name of the pass (e.g. can be AlwaysPrint).  \p
  40 /// RemarkName is the identifier for the remark.  If \p I is passed it is an
  41 /// instruction that prevents vectorization.  Otherwise \p TheLoop is used for
  42 /// the location of the remark.  \return the remark object that can be
  43 /// streamed to.
  44 OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
  45                                                   StringRef RemarkName,
  46                                                   Loop *TheLoop,
  47                                                   Instruction *I = nullptr);
  48
  49 /// Utility class for getting and setting loop vectorizer hints in the form
  50 /// of loop metadata.
  51 /// This class keeps a number of loop annotations locally (as member variables)
  52 /// and can, upon request, write them back as metadata on the loop. It will
  53 /// initially scan the loop for existing metadata, and will update the local
  54 /// values based on information in the loop.
  55 /// We cannot write all values to metadata, as the mere presence of some info,
  56 /// for example 'force', means a decision has been made. So, we need to be
  57 /// careful NOT to add them if the user hasn't specifically asked so.
  58 class LoopVectorizeHints {
  59   enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED };
  60
  61   /// Hint - associates name and validation with the hint value.
  62   struct Hint {
  63     const char *Name;
  64     unsigned Value; // This may have to change for non-numeric values.
  65     HintKind Kind;
  66
  67     Hint(const char *Name, unsigned Value, HintKind Kind)
  68         : Name(Name), Value(Value), Kind(Kind) {}
  69
  70     bool validate(unsigned Val);
  71   };
  72
  73   /// Vectorization width.
  74   Hint Width;
  75
  76   /// Vectorization interleave factor.
  77   Hint Interleave;
  78
  79   /// Vectorization forced
  80   Hint Force;
  81
  82   /// Already Vectorized
  83   Hint IsVectorized;
  84
  85   /// Return the loop metadata prefix.
  86   static StringRef Prefix() { return "llvm.loop."; }
  87
  88   /// True if there is any unsafe math in the loop.
  89   bool PotentiallyUnsafe = false;
  90
  91 public:
  92   enum ForceKind {
  93     FK_Undefined = -1, ///< Not selected.
  94     FK_Disabled = 0,   ///< Forcing disabled.
  95     FK_Enabled = 1,    ///< Forcing enabled.
  96   };
  97
  98   LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
  99                      OptimizationRemarkEmitter &ORE);
 100
 101   /// Mark the loop L as already vectorized by setting the width to 1.
 102   void setAlreadyVectorized() {
 103     IsVectorized.Value = 1;
 104     Hint Hints[] = {IsVectorized};
 105     writeHintsToMetadata(Hints);
 106   }
 107
 108   bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const;
 109
 110   /// Dumps all the hint information.
 111   void emitRemarkWithHints() const;
 112
 113   unsigned getWidth() const { return Width.Value; }
 114   unsigned getInterleave() const { return Interleave.Value; }
 115   unsigned getIsVectorized() const { return IsVectorized.Value; }
 116   enum ForceKind getForce() const { return (ForceKind)Force.Value; }
 117
 118   /// If hints are provided that force vectorization, use the AlwaysPrint
 119   /// pass name to force the frontend to print the diagnostic.
 120   const char *vectorizeAnalysisPassName() const;
 121
 122   bool allowReordering() const {
 123     // When enabling loop hints are provided we allow the vectorizer to change
 124     // the order of operations that is given by the scalar loop. This is not
 125     // enabled by default because can be unsafe or inefficient. For example,
 126     // reordering floating-point operations will change the way round-off
 127     // error accumulates in the loop.
 128     return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
 129   }
 130
 131   bool isPotentiallyUnsafe() const {
 132     // Avoid FP vectorization if the target is unsure about proper support.
 133     // This may be related to the SIMD unit in the target not handling
 134     // IEEE 754 FP ops properly, or bad single-to-double promotions.
 135     // Otherwise, a sequence of vectorized loops, even without reduction,
 136     // could lead to different end results on the destination vectors.
 137     return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
 138   }
 139
 140   void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
 141
 142 private:
 143   /// Find hints specified in the loop metadata and update local values.
 144   void getHintsFromMetadata();
 145
 146   /// Checks string hint with one operand and set value if valid.
 147   void setHint(StringRef Name, Metadata *Arg);
 148
 149   /// Create a new hint from name / value pair.
 150   MDNode *createHintMetadata(StringRef Name, unsigned V) const;
 151
 152   /// Matches metadata with hint name.
 153   bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes);
 154
 155   /// Sets current hints into loop metadata, keeping other values intact.
 156   void writeHintsToMetadata(ArrayRef<Hint> HintTypes);
 157
 158   /// The loop these hints belong to.
 159   const Loop *TheLoop;
 160
 161   /// Interface to emit optimization remarks.
 162   OptimizationRemarkEmitter &ORE;
 163 };
 164
 165 /// This holds vectorization requirements that must be verified late in
 166 /// the process. The requirements are set by legalize and costmodel. Once
 167 /// vectorization has been determined to be possible and profitable the
 168 /// requirements can be verified by looking for metadata or compiler options.
 169 /// For example, some loops require FP commutativity which is only allowed if
 170 /// vectorization is explicitly specified or if the fast-math compiler option
 171 /// has been provided.
 172 /// Late evaluation of these requirements allows helpful diagnostics to be
 173 /// composed that tells the user what need to be done to vectorize the loop. For
 174 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
 175 /// evaluation should be used only when diagnostics can generated that can be
 176 /// followed by a non-expert user.
 177 class LoopVectorizationRequirements {
 178 public:
 179   LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
 180
 181   void addUnsafeAlgebraInst(Instruction *I) {
 182     // First unsafe algebra instruction.
 183     if (!UnsafeAlgebraInst)
 184       UnsafeAlgebraInst = I;
 185   }
 186
 187   void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
 188
 189   bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
 190
 191 private:
 192   unsigned NumRuntimePointerChecks = 0;
 193   Instruction *UnsafeAlgebraInst = nullptr;
 194
 195   /// Interface to emit optimization remarks.
 196   OptimizationRemarkEmitter &ORE;
 197 };
 198
 199 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
 200 /// to what vectorization factor.
 201 /// This class does not look at the profitability of vectorization, only the
 202 /// legality. This class has two main kinds of checks:
 203 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
 204 ///   will change the order of memory accesses in a way that will change the
 205 ///   correctness of the program.
 206 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
 207 /// checks for a number of different conditions, such as the availability of a
 208 /// single induction variable, that all types are supported and vectorize-able,
 209 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
 210 /// This class is also used by InnerLoopVectorizer for identifying
 211 /// induction variable and the different reduction variables.
 212 class LoopVectorizationLegality {
 213 public:
 214   LoopVectorizationLegality(
 215       Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
 216       TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F,
 217       std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
 218       OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
 219       LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
 220       : TheLoop(L), LI(LI), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA),
 221         ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
 222
 223   /// ReductionList contains the reduction descriptors for all
 224   /// of the reductions that were found in the loop.
 225   using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>;
 226
 227   /// InductionList saves induction variables and maps them to the
 228   /// induction descriptor.
 229   using InductionList = MapVector<PHINode *, InductionDescriptor>;
 230
 231   /// RecurrenceSet contains the phi nodes that are recurrences other than
 232   /// inductions and reductions.
 233   using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
 234
 235   /// Returns true if it is legal to vectorize this loop.
 236   /// This does not mean that it is profitable to vectorize this
 237   /// loop, only that it is legal to do so.
 238   /// Temporarily taking UseVPlanNativePath parameter. If true, take
 239   /// the new code path being implemented for outer loop vectorization
 240   /// (should be functional for inner loop vectorization) based on VPlan.
 241   /// If false, good old LV code.
 242   bool canVectorize(bool UseVPlanNativePath);
 243
 244   /// Returns the primary induction variable.
 245   PHINode *getPrimaryInduction() { return PrimaryInduction; }
 246
 247   /// Returns the reduction variables found in the loop.
 248   ReductionList *getReductionVars() { return &Reductions; }
 249
 250   /// Returns the induction variables found in the loop.
 251   InductionList *getInductionVars() { return &Inductions; }
 252
 253   /// Return the first-order recurrences found in the loop.
 254   RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
 255
 256   /// Return the set of instructions to sink to handle first-order recurrences.
 257   DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
 258
 259   /// Returns the widest induction type.
 260   Type *getWidestInductionType() { return WidestIndTy; }
 261
 262   /// Returns True if V is a Phi node of an induction variable in this loop.
 263   bool isInductionPhi(const Value *V);
 264
 265   /// Returns True if V is a cast that is part of an induction def-use chain,
 266   /// and had been proven to be redundant under a runtime guard (in other
 267   /// words, the cast has the same SCEV expression as the induction phi).
 268   bool isCastedInductionVariable(const Value *V);
 269
 270   /// Returns True if V can be considered as an induction variable in this
 271   /// loop. V can be the induction phi, or some redundant cast in the def-use
 272   /// chain of the inducion phi.
 273   bool isInductionVariable(const Value *V);
 274
 275   /// Returns True if PN is a reduction variable in this loop.
 276   bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
 277
 278   /// Returns True if Phi is a first-order recurrence in this loop.
 279   bool isFirstOrderRecurrence(const PHINode *Phi);
 280
 281   /// Return true if the block BB needs to be predicated in order for the loop
 282   /// to be vectorized.
 283   bool blockNeedsPredication(BasicBlock *BB);
 284
 285   /// Check if this pointer is consecutive when vectorizing. This happens
 286   /// when the last index of the GEP is the induction variable, or that the
 287   /// pointer itself is an induction variable.
 288   /// This check allows us to vectorize A[idx] into a wide load/store.
 289   /// Returns:
 290   /// 0 - Stride is unknown or non-consecutive.
 291   /// 1 - Address is consecutive.
 292   /// -1 - Address is consecutive, and decreasing.
 293   /// NOTE: This method must only be used before modifying the original scalar
 294   /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
 295   int isConsecutivePtr(Value *Ptr);
 296
 297   /// Returns true if the value V is uniform within the loop.
 298   bool isUniform(Value *V);
 299
 300   /// Returns the information that we collected about runtime memory check.
 301   const RuntimePointerChecking *getRuntimePointerChecking() const {
 302     return LAI->getRuntimePointerChecking();
 303   }
 304
 305   const LoopAccessInfo *getLAI() const { return LAI; }
 306
 307   unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
 308
 309   uint64_t getMaxSafeRegisterWidth() const {
 310     return LAI->getDepChecker().getMaxSafeRegisterWidth();
 311   }
 312
 313   bool hasStride(Value *V) { return LAI->hasStride(V); }
 314
 315   /// Returns true if vector representation of the instruction \p I
 316   /// requires mask.
 317   bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
 318
 319   unsigned getNumStores() const { return LAI->getNumStores(); }
 320   unsigned getNumLoads() const { return LAI->getNumLoads(); }
 321
 322   // Returns true if the NoNaN attribute is set on the function.
 323   bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
 324
 325 private:
 326   /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
 327   /// its nested loops are considered legal for vectorization. These legal
 328   /// checks are common for inner and outer loop vectorization.
 329   /// Temporarily taking UseVPlanNativePath parameter. If true, take
 330   /// the new code path being implemented for outer loop vectorization
 331   /// (should be functional for inner loop vectorization) based on VPlan.
 332   /// If false, good old LV code.
 333   bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
 334
 335   /// Return true if the pre-header, exiting and latch blocks of \p Lp
 336   /// (non-recursive) are considered legal for vectorization.
 337   /// Temporarily taking UseVPlanNativePath parameter. If true, take
 338   /// the new code path being implemented for outer loop vectorization
 339   /// (should be functional for inner loop vectorization) based on VPlan.
 340   /// If false, good old LV code.
 341   bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
 342
 343   /// Check if a single basic block loop is vectorizable.
 344   /// At this point we know that this is a loop with a constant trip count
 345   /// and we only need to check individual instructions.
 346   bool canVectorizeInstrs();
 347
 348   /// When we vectorize loops we may change the order in which
 349   /// we read and write from memory. This method checks if it is
 350   /// legal to vectorize the code, considering only memory constrains.
 351   /// Returns true if the loop is vectorizable
 352   bool canVectorizeMemory();
 353
 354   /// Return true if we can vectorize this loop using the IF-conversion
 355   /// transformation.
 356   bool canVectorizeWithIfConvert();
 357
 358   /// Return true if we can vectorize this outer loop. The method performs
 359   /// specific checks for outer loop vectorization.
 360   bool canVectorizeOuterLoop();
 361
 362   /// Return true if all of the instructions in the block can be speculatively
 363   /// executed. \p SafePtrs is a list of addresses that are known to be legal
 364   /// and we know that we can read from them without segfault.
 365   bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
 366
 367   /// Updates the vectorization state by adding \p Phi to the inductions list.
 368   /// This can set \p Phi as the main induction of the loop if \p Phi is a
 369   /// better choice for the main induction than the existing one.
 370   void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
 371                        SmallPtrSetImpl<Value *> &AllowedExit);
 372
 373   /// Create an analysis remark that explains why vectorization failed
 374   ///
 375   /// \p RemarkName is the identifier for the remark.  If \p I is passed it is
 376   /// an instruction that prevents vectorization.  Otherwise the loop is used
 377   /// for the location of the remark.  \return the remark object that can be
 378   /// streamed to.
 379   OptimizationRemarkAnalysis
 380   createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const {
 381     return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
 382                                   RemarkName, TheLoop, I);
 383   }
 384
 385   /// If an access has a symbolic strides, this maps the pointer value to
 386   /// the stride symbol.
 387   const ValueToValueMap *getSymbolicStrides() {
 388     // FIXME: Currently, the set of symbolic strides is sometimes queried before
 389     // it's collected.  This happens from canVectorizeWithIfConvert, when the
 390     // pointer is checked to reference consecutive elements suitable for a
 391     // masked access.
 392     return LAI ? &LAI->getSymbolicStrides() : nullptr;
 393   }
 394
 395   /// The loop that we evaluate.
 396   Loop *TheLoop;
 397
 398   /// Loop Info analysis.
 399   LoopInfo *LI;
 400
 401   /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
 402   /// Applies dynamic knowledge to simplify SCEV expressions in the context
 403   /// of existing SCEV assumptions. The analysis will also add a minimal set
 404   /// of new predicates if this is required to enable vectorization and
 405   /// unrolling.
 406   PredicatedScalarEvolution &PSE;
 407
 408   /// Target Library Info.
 409   TargetLibraryInfo *TLI;
 410
 411   /// Dominator Tree.
 412   DominatorTree *DT;
 413
 414   // LoopAccess analysis.
 415   std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
 416
 417   // And the loop-accesses info corresponding to this loop.  This pointer is
 418   // null until canVectorizeMemory sets it up.
 419   const LoopAccessInfo *LAI = nullptr;
 420
 421   /// Interface to emit optimization remarks.
 422   OptimizationRemarkEmitter *ORE;
 423
 424   //  ---  vectorization state --- //
 425
 426   /// Holds the primary induction variable. This is the counter of the
 427   /// loop.
 428   PHINode *PrimaryInduction = nullptr;
 429
 430   /// Holds the reduction variables.
 431   ReductionList Reductions;
 432
 433   /// Holds all of the induction variables that we found in the loop.
 434   /// Notice that inductions don't need to start at zero and that induction
 435   /// variables can be pointers.
 436   InductionList Inductions;
 437
 438   /// Holds all the casts that participate in the update chain of the induction
 439   /// variables, and that have been proven to be redundant (possibly under a
 440   /// runtime guard). These casts can be ignored when creating the vectorized
 441   /// loop body.
 442   SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
 443
 444   /// Holds the phi nodes that are first-order recurrences.
 445   RecurrenceSet FirstOrderRecurrences;
 446
 447   /// Holds instructions that need to sink past other instructions to handle
 448   /// first-order recurrences.
 449   DenseMap<Instruction *, Instruction *> SinkAfter;
 450
 451   /// Holds the widest induction type encountered.
 452   Type *WidestIndTy = nullptr;
 453
 454   /// Allowed outside users. This holds the induction and reduction
 455   /// vars which can be accessed from outside the loop.
 456   SmallPtrSet<Value *, 4> AllowedExit;
 457
 458   /// Can we assume the absence of NaNs.
 459   bool HasFunNoNaNAttr = false;
 460
 461   /// Vectorization requirements that will go through late-evaluation.
 462   LoopVectorizationRequirements *Requirements;
 463
 464   /// Used to emit an analysis of any legality issues.
 465   LoopVectorizeHints *Hints;
 466
 467   /// The demanded bits analsyis is used to compute the minimum type size in
 468   /// which a reduction can be computed.
 469   DemandedBits *DB;
 470
 471   /// The assumption cache analysis is used to compute the minimum type size in
 472   /// which a reduction can be computed.
 473   AssumptionCache *AC;
 474
 475   /// While vectorizing these instructions we have to generate a
 476   /// call to the appropriate masked intrinsic
 477   SmallPtrSet<const Instruction *, 8> MaskedOp;
 478 };
 479
 480 } // namespace llvm
 481
 482 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H