1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This file defines the LoopVectorizationLegality class. Original code
12 /// in Loop Vectorizer has been moved out to its own file for modularity
15 /// Currently, it works for innermost loop vectorization. Extending this to
16 /// outer loop vectorization is a TODO item.
19 /// 1) LoopVectorizeHints class which keeps a number of loop annotations
20 /// locally for easy look up. It has the ability to write them back as
21 /// loop metadata, upon request.
22 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
23 /// of reporting useful failure to vectorize message.
25 //===----------------------------------------------------------------------===//
27 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
30 #include "llvm/ADT/MapVector.h"
31 #include "llvm/Analysis/LoopAccessAnalysis.h"
32 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
33 #include "llvm/Transforms/Utils/LoopUtils.h"
37 /// Create an analysis remark that explains why vectorization failed
39 /// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
40 /// RemarkName is the identifier for the remark. If \p I is passed it is an
41 /// instruction that prevents vectorization. Otherwise \p TheLoop is used for
42 /// the location of the remark. \return the remark object that can be
44 OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
47 Instruction *I = nullptr);
49 /// Utility class for getting and setting loop vectorizer hints in the form
51 /// This class keeps a number of loop annotations locally (as member variables)
52 /// and can, upon request, write them back as metadata on the loop. It will
53 /// initially scan the loop for existing metadata, and will update the local
54 /// values based on information in the loop.
55 /// We cannot write all values to metadata, as the mere presence of some info,
56 /// for example 'force', means a decision has been made. So, we need to be
57 /// careful NOT to add them if the user hasn't specifically asked so.
58 class LoopVectorizeHints {
59 enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED };
61 /// Hint - associates name and validation with the hint value.
64 unsigned Value; // This may have to change for non-numeric values.
67 Hint(const char *Name, unsigned Value, HintKind Kind)
68 : Name(Name), Value(Value), Kind(Kind) {}
70 bool validate(unsigned Val);
73 /// Vectorization width.
76 /// Vectorization interleave factor.
79 /// Vectorization forced
82 /// Already Vectorized
85 /// Return the loop metadata prefix.
86 static StringRef Prefix() { return "llvm.loop."; }
88 /// True if there is any unsafe math in the loop.
89 bool PotentiallyUnsafe = false;
93 FK_Undefined = -1, ///< Not selected.
94 FK_Disabled = 0, ///< Forcing disabled.
95 FK_Enabled = 1, ///< Forcing enabled.
98 LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
99 OptimizationRemarkEmitter &ORE);
101 /// Mark the loop L as already vectorized by setting the width to 1.
102 void setAlreadyVectorized() {
103 IsVectorized.Value = 1;
104 Hint Hints[] = {IsVectorized};
105 writeHintsToMetadata(Hints);
108 bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const;
110 /// Dumps all the hint information.
111 void emitRemarkWithHints() const;
113 unsigned getWidth() const { return Width.Value; }
114 unsigned getInterleave() const { return Interleave.Value; }
115 unsigned getIsVectorized() const { return IsVectorized.Value; }
116 enum ForceKind getForce() const { return (ForceKind)Force.Value; }
118 /// If hints are provided that force vectorization, use the AlwaysPrint
119 /// pass name to force the frontend to print the diagnostic.
120 const char *vectorizeAnalysisPassName() const;
122 bool allowReordering() const {
123 // When enabling loop hints are provided we allow the vectorizer to change
124 // the order of operations that is given by the scalar loop. This is not
125 // enabled by default because can be unsafe or inefficient. For example,
126 // reordering floating-point operations will change the way round-off
127 // error accumulates in the loop.
128 return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
131 bool isPotentiallyUnsafe() const {
132 // Avoid FP vectorization if the target is unsure about proper support.
133 // This may be related to the SIMD unit in the target not handling
134 // IEEE 754 FP ops properly, or bad single-to-double promotions.
135 // Otherwise, a sequence of vectorized loops, even without reduction,
136 // could lead to different end results on the destination vectors.
137 return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
140 void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
143 /// Find hints specified in the loop metadata and update local values.
144 void getHintsFromMetadata();
146 /// Checks string hint with one operand and set value if valid.
147 void setHint(StringRef Name, Metadata *Arg);
149 /// Create a new hint from name / value pair.
150 MDNode *createHintMetadata(StringRef Name, unsigned V) const;
152 /// Matches metadata with hint name.
153 bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes);
155 /// Sets current hints into loop metadata, keeping other values intact.
156 void writeHintsToMetadata(ArrayRef<Hint> HintTypes);
158 /// The loop these hints belong to.
161 /// Interface to emit optimization remarks.
162 OptimizationRemarkEmitter &ORE;
165 /// This holds vectorization requirements that must be verified late in
166 /// the process. The requirements are set by legalize and costmodel. Once
167 /// vectorization has been determined to be possible and profitable the
168 /// requirements can be verified by looking for metadata or compiler options.
169 /// For example, some loops require FP commutativity which is only allowed if
170 /// vectorization is explicitly specified or if the fast-math compiler option
171 /// has been provided.
172 /// Late evaluation of these requirements allows helpful diagnostics to be
173 /// composed that tells the user what need to be done to vectorize the loop. For
174 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
175 /// evaluation should be used only when diagnostics can generated that can be
176 /// followed by a non-expert user.
177 class LoopVectorizationRequirements {
179 LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
181 void addUnsafeAlgebraInst(Instruction *I) {
182 // First unsafe algebra instruction.
183 if (!UnsafeAlgebraInst)
184 UnsafeAlgebraInst = I;
187 void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
189 bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
192 unsigned NumRuntimePointerChecks = 0;
193 Instruction *UnsafeAlgebraInst = nullptr;
195 /// Interface to emit optimization remarks.
196 OptimizationRemarkEmitter &ORE;
199 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
200 /// to what vectorization factor.
201 /// This class does not look at the profitability of vectorization, only the
202 /// legality. This class has two main kinds of checks:
203 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
204 /// will change the order of memory accesses in a way that will change the
205 /// correctness of the program.
206 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
207 /// checks for a number of different conditions, such as the availability of a
208 /// single induction variable, that all types are supported and vectorize-able,
209 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
210 /// This class is also used by InnerLoopVectorizer for identifying
211 /// induction variable and the different reduction variables.
212 class LoopVectorizationLegality {
214 LoopVectorizationLegality(
215 Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
216 TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F,
217 std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
218 OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
219 LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
220 : TheLoop(L), LI(LI), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA),
221 ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
223 /// ReductionList contains the reduction descriptors for all
224 /// of the reductions that were found in the loop.
225 using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>;
227 /// InductionList saves induction variables and maps them to the
228 /// induction descriptor.
229 using InductionList = MapVector<PHINode *, InductionDescriptor>;
231 /// RecurrenceSet contains the phi nodes that are recurrences other than
232 /// inductions and reductions.
233 using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
235 /// Returns true if it is legal to vectorize this loop.
236 /// This does not mean that it is profitable to vectorize this
237 /// loop, only that it is legal to do so.
238 /// Temporarily taking UseVPlanNativePath parameter. If true, take
239 /// the new code path being implemented for outer loop vectorization
240 /// (should be functional for inner loop vectorization) based on VPlan.
241 /// If false, good old LV code.
242 bool canVectorize(bool UseVPlanNativePath);
244 /// Returns the primary induction variable.
245 PHINode *getPrimaryInduction() { return PrimaryInduction; }
247 /// Returns the reduction variables found in the loop.
248 ReductionList *getReductionVars() { return &Reductions; }
250 /// Returns the induction variables found in the loop.
251 InductionList *getInductionVars() { return &Inductions; }
253 /// Return the first-order recurrences found in the loop.
254 RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
256 /// Return the set of instructions to sink to handle first-order recurrences.
257 DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
259 /// Returns the widest induction type.
260 Type *getWidestInductionType() { return WidestIndTy; }
262 /// Returns True if V is a Phi node of an induction variable in this loop.
263 bool isInductionPhi(const Value *V);
265 /// Returns True if V is a cast that is part of an induction def-use chain,
266 /// and had been proven to be redundant under a runtime guard (in other
267 /// words, the cast has the same SCEV expression as the induction phi).
268 bool isCastedInductionVariable(const Value *V);
270 /// Returns True if V can be considered as an induction variable in this
271 /// loop. V can be the induction phi, or some redundant cast in the def-use
272 /// chain of the inducion phi.
273 bool isInductionVariable(const Value *V);
275 /// Returns True if PN is a reduction variable in this loop.
276 bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
278 /// Returns True if Phi is a first-order recurrence in this loop.
279 bool isFirstOrderRecurrence(const PHINode *Phi);
281 /// Return true if the block BB needs to be predicated in order for the loop
282 /// to be vectorized.
283 bool blockNeedsPredication(BasicBlock *BB);
285 /// Check if this pointer is consecutive when vectorizing. This happens
286 /// when the last index of the GEP is the induction variable, or that the
287 /// pointer itself is an induction variable.
288 /// This check allows us to vectorize A[idx] into a wide load/store.
290 /// 0 - Stride is unknown or non-consecutive.
291 /// 1 - Address is consecutive.
292 /// -1 - Address is consecutive, and decreasing.
293 /// NOTE: This method must only be used before modifying the original scalar
294 /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
295 int isConsecutivePtr(Value *Ptr);
297 /// Returns true if the value V is uniform within the loop.
298 bool isUniform(Value *V);
300 /// Returns the information that we collected about runtime memory check.
301 const RuntimePointerChecking *getRuntimePointerChecking() const {
302 return LAI->getRuntimePointerChecking();
305 const LoopAccessInfo *getLAI() const { return LAI; }
307 unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
309 uint64_t getMaxSafeRegisterWidth() const {
310 return LAI->getDepChecker().getMaxSafeRegisterWidth();
313 bool hasStride(Value *V) { return LAI->hasStride(V); }
315 /// Returns true if vector representation of the instruction \p I
317 bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
319 unsigned getNumStores() const { return LAI->getNumStores(); }
320 unsigned getNumLoads() const { return LAI->getNumLoads(); }
322 // Returns true if the NoNaN attribute is set on the function.
323 bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
326 /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
327 /// its nested loops are considered legal for vectorization. These legal
328 /// checks are common for inner and outer loop vectorization.
329 /// Temporarily taking UseVPlanNativePath parameter. If true, take
330 /// the new code path being implemented for outer loop vectorization
331 /// (should be functional for inner loop vectorization) based on VPlan.
332 /// If false, good old LV code.
333 bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
335 /// Return true if the pre-header, exiting and latch blocks of \p Lp
336 /// (non-recursive) are considered legal for vectorization.
337 /// Temporarily taking UseVPlanNativePath parameter. If true, take
338 /// the new code path being implemented for outer loop vectorization
339 /// (should be functional for inner loop vectorization) based on VPlan.
340 /// If false, good old LV code.
341 bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
343 /// Check if a single basic block loop is vectorizable.
344 /// At this point we know that this is a loop with a constant trip count
345 /// and we only need to check individual instructions.
346 bool canVectorizeInstrs();
348 /// When we vectorize loops we may change the order in which
349 /// we read and write from memory. This method checks if it is
350 /// legal to vectorize the code, considering only memory constrains.
351 /// Returns true if the loop is vectorizable
352 bool canVectorizeMemory();
354 /// Return true if we can vectorize this loop using the IF-conversion
356 bool canVectorizeWithIfConvert();
358 /// Return true if we can vectorize this outer loop. The method performs
359 /// specific checks for outer loop vectorization.
360 bool canVectorizeOuterLoop();
362 /// Return true if all of the instructions in the block can be speculatively
363 /// executed. \p SafePtrs is a list of addresses that are known to be legal
364 /// and we know that we can read from them without segfault.
365 bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
367 /// Updates the vectorization state by adding \p Phi to the inductions list.
368 /// This can set \p Phi as the main induction of the loop if \p Phi is a
369 /// better choice for the main induction than the existing one.
370 void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
371 SmallPtrSetImpl<Value *> &AllowedExit);
373 /// Create an analysis remark that explains why vectorization failed
375 /// \p RemarkName is the identifier for the remark. If \p I is passed it is
376 /// an instruction that prevents vectorization. Otherwise the loop is used
377 /// for the location of the remark. \return the remark object that can be
379 OptimizationRemarkAnalysis
380 createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const {
381 return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
382 RemarkName, TheLoop, I);
385 /// If an access has a symbolic strides, this maps the pointer value to
386 /// the stride symbol.
387 const ValueToValueMap *getSymbolicStrides() {
388 // FIXME: Currently, the set of symbolic strides is sometimes queried before
389 // it's collected. This happens from canVectorizeWithIfConvert, when the
390 // pointer is checked to reference consecutive elements suitable for a
392 return LAI ? &LAI->getSymbolicStrides() : nullptr;
395 /// The loop that we evaluate.
398 /// Loop Info analysis.
401 /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
402 /// Applies dynamic knowledge to simplify SCEV expressions in the context
403 /// of existing SCEV assumptions. The analysis will also add a minimal set
404 /// of new predicates if this is required to enable vectorization and
406 PredicatedScalarEvolution &PSE;
408 /// Target Library Info.
409 TargetLibraryInfo *TLI;
414 // LoopAccess analysis.
415 std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
417 // And the loop-accesses info corresponding to this loop. This pointer is
418 // null until canVectorizeMemory sets it up.
419 const LoopAccessInfo *LAI = nullptr;
421 /// Interface to emit optimization remarks.
422 OptimizationRemarkEmitter *ORE;
424 // --- vectorization state --- //
426 /// Holds the primary induction variable. This is the counter of the
428 PHINode *PrimaryInduction = nullptr;
430 /// Holds the reduction variables.
431 ReductionList Reductions;
433 /// Holds all of the induction variables that we found in the loop.
434 /// Notice that inductions don't need to start at zero and that induction
435 /// variables can be pointers.
436 InductionList Inductions;
438 /// Holds all the casts that participate in the update chain of the induction
439 /// variables, and that have been proven to be redundant (possibly under a
440 /// runtime guard). These casts can be ignored when creating the vectorized
442 SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
444 /// Holds the phi nodes that are first-order recurrences.
445 RecurrenceSet FirstOrderRecurrences;
447 /// Holds instructions that need to sink past other instructions to handle
448 /// first-order recurrences.
449 DenseMap<Instruction *, Instruction *> SinkAfter;
451 /// Holds the widest induction type encountered.
452 Type *WidestIndTy = nullptr;
454 /// Allowed outside users. This holds the induction and reduction
455 /// vars which can be accessed from outside the loop.
456 SmallPtrSet<Value *, 4> AllowedExit;
458 /// Can we assume the absence of NaNs.
459 bool HasFunNoNaNAttr = false;
461 /// Vectorization requirements that will go through late-evaluation.
462 LoopVectorizationRequirements *Requirements;
464 /// Used to emit an analysis of any legality issues.
465 LoopVectorizeHints *Hints;
467 /// The demanded bits analsyis is used to compute the minimum type size in
468 /// which a reduction can be computed.
471 /// The assumption cache analysis is used to compute the minimum type size in
472 /// which a reduction can be computed.
475 /// While vectorizing these instructions we have to generate a
476 /// call to the appropriate masked intrinsic
477 SmallPtrSet<const Instruction *, 8> MaskedOp;
482 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H