Home | History | Annotate | Download | only in Vectorize
      1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// This file defines the LoopVectorizationLegality class. Original code
     12 /// in Loop Vectorizer has been moved out to its own file for modularity
     13 /// and reusability.
     14 ///
     15 /// Currently, it works for innermost loop vectorization. Extending this to
     16 /// outer loop vectorization is a TODO item.
     17 ///
     18 /// Also provides:
     19 /// 1) LoopVectorizeHints class which keeps a number of loop annotations
     20 /// locally for easy look up. It has the ability to write them back as
     21 /// loop metadata, upon request.
     22 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
     23 /// of reporting useful failure to vectorize message.
     24 //
     25 //===----------------------------------------------------------------------===//
     26 
     27 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
     28 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
     29 
     30 #include "llvm/ADT/MapVector.h"
     31 #include "llvm/Analysis/LoopAccessAnalysis.h"
     32 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
     33 #include "llvm/Transforms/Utils/LoopUtils.h"
     34 
     35 namespace llvm {
     36 
     37 /// Create an analysis remark that explains why vectorization failed
     38 ///
     39 /// \p PassName is the name of the pass (e.g. can be AlwaysPrint).  \p
     40 /// RemarkName is the identifier for the remark.  If \p I is passed it is an
     41 /// instruction that prevents vectorization.  Otherwise \p TheLoop is used for
     42 /// the location of the remark.  \return the remark object that can be
     43 /// streamed to.
     44 OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
     45                                                   StringRef RemarkName,
     46                                                   Loop *TheLoop,
     47                                                   Instruction *I = nullptr);
     48 
     49 /// Utility class for getting and setting loop vectorizer hints in the form
     50 /// of loop metadata.
     51 /// This class keeps a number of loop annotations locally (as member variables)
     52 /// and can, upon request, write them back as metadata on the loop. It will
     53 /// initially scan the loop for existing metadata, and will update the local
     54 /// values based on information in the loop.
     55 /// We cannot write all values to metadata, as the mere presence of some info,
     56 /// for example 'force', means a decision has been made. So, we need to be
     57 /// careful NOT to add them if the user hasn't specifically asked so.
     58 class LoopVectorizeHints {
     59   enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED };
     60 
     61   /// Hint - associates name and validation with the hint value.
     62   struct Hint {
     63     const char *Name;
     64     unsigned Value; // This may have to change for non-numeric values.
     65     HintKind Kind;
     66 
     67     Hint(const char *Name, unsigned Value, HintKind Kind)
     68         : Name(Name), Value(Value), Kind(Kind) {}
     69 
     70     bool validate(unsigned Val);
     71   };
     72 
     73   /// Vectorization width.
     74   Hint Width;
     75 
     76   /// Vectorization interleave factor.
     77   Hint Interleave;
     78 
     79   /// Vectorization forced
     80   Hint Force;
     81 
     82   /// Already Vectorized
     83   Hint IsVectorized;
     84 
     85   /// Return the loop metadata prefix.
     86   static StringRef Prefix() { return "llvm.loop."; }
     87 
     88   /// True if there is any unsafe math in the loop.
     89   bool PotentiallyUnsafe = false;
     90 
     91 public:
     92   enum ForceKind {
     93     FK_Undefined = -1, ///< Not selected.
     94     FK_Disabled = 0,   ///< Forcing disabled.
     95     FK_Enabled = 1,    ///< Forcing enabled.
     96   };
     97 
     98   LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
     99                      OptimizationRemarkEmitter &ORE);
    100 
    101   /// Mark the loop L as already vectorized by setting the width to 1.
    102   void setAlreadyVectorized() {
    103     IsVectorized.Value = 1;
    104     Hint Hints[] = {IsVectorized};
    105     writeHintsToMetadata(Hints);
    106   }
    107 
    108   bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const;
    109 
    110   /// Dumps all the hint information.
    111   void emitRemarkWithHints() const;
    112 
    113   unsigned getWidth() const { return Width.Value; }
    114   unsigned getInterleave() const { return Interleave.Value; }
    115   unsigned getIsVectorized() const { return IsVectorized.Value; }
    116   enum ForceKind getForce() const { return (ForceKind)Force.Value; }
    117 
    118   /// If hints are provided that force vectorization, use the AlwaysPrint
    119   /// pass name to force the frontend to print the diagnostic.
    120   const char *vectorizeAnalysisPassName() const;
    121 
    122   bool allowReordering() const {
    123     // When enabling loop hints are provided we allow the vectorizer to change
    124     // the order of operations that is given by the scalar loop. This is not
    125     // enabled by default because can be unsafe or inefficient. For example,
    126     // reordering floating-point operations will change the way round-off
    127     // error accumulates in the loop.
    128     return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
    129   }
    130 
    131   bool isPotentiallyUnsafe() const {
    132     // Avoid FP vectorization if the target is unsure about proper support.
    133     // This may be related to the SIMD unit in the target not handling
    134     // IEEE 754 FP ops properly, or bad single-to-double promotions.
    135     // Otherwise, a sequence of vectorized loops, even without reduction,
    136     // could lead to different end results on the destination vectors.
    137     return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
    138   }
    139 
    140   void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
    141 
    142 private:
    143   /// Find hints specified in the loop metadata and update local values.
    144   void getHintsFromMetadata();
    145 
    146   /// Checks string hint with one operand and set value if valid.
    147   void setHint(StringRef Name, Metadata *Arg);
    148 
    149   /// Create a new hint from name / value pair.
    150   MDNode *createHintMetadata(StringRef Name, unsigned V) const;
    151 
    152   /// Matches metadata with hint name.
    153   bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes);
    154 
    155   /// Sets current hints into loop metadata, keeping other values intact.
    156   void writeHintsToMetadata(ArrayRef<Hint> HintTypes);
    157 
    158   /// The loop these hints belong to.
    159   const Loop *TheLoop;
    160 
    161   /// Interface to emit optimization remarks.
    162   OptimizationRemarkEmitter &ORE;
    163 };
    164 
    165 /// This holds vectorization requirements that must be verified late in
    166 /// the process. The requirements are set by legalize and costmodel. Once
    167 /// vectorization has been determined to be possible and profitable the
    168 /// requirements can be verified by looking for metadata or compiler options.
    169 /// For example, some loops require FP commutativity which is only allowed if
    170 /// vectorization is explicitly specified or if the fast-math compiler option
    171 /// has been provided.
    172 /// Late evaluation of these requirements allows helpful diagnostics to be
    173 /// composed that tells the user what need to be done to vectorize the loop. For
    174 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
    175 /// evaluation should be used only when diagnostics can generated that can be
    176 /// followed by a non-expert user.
    177 class LoopVectorizationRequirements {
    178 public:
    179   LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
    180 
    181   void addUnsafeAlgebraInst(Instruction *I) {
    182     // First unsafe algebra instruction.
    183     if (!UnsafeAlgebraInst)
    184       UnsafeAlgebraInst = I;
    185   }
    186 
    187   void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
    188 
    189   bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
    190 
    191 private:
    192   unsigned NumRuntimePointerChecks = 0;
    193   Instruction *UnsafeAlgebraInst = nullptr;
    194 
    195   /// Interface to emit optimization remarks.
    196   OptimizationRemarkEmitter &ORE;
    197 };
    198 
    199 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
    200 /// to what vectorization factor.
    201 /// This class does not look at the profitability of vectorization, only the
    202 /// legality. This class has two main kinds of checks:
    203 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
    204 ///   will change the order of memory accesses in a way that will change the
    205 ///   correctness of the program.
    206 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
    207 /// checks for a number of different conditions, such as the availability of a
    208 /// single induction variable, that all types are supported and vectorize-able,
    209 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
    210 /// This class is also used by InnerLoopVectorizer for identifying
    211 /// induction variable and the different reduction variables.
    212 class LoopVectorizationLegality {
    213 public:
    214   LoopVectorizationLegality(
    215       Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
    216       TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F,
    217       std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
    218       OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
    219       LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
    220       : TheLoop(L), LI(LI), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA),
    221         ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
    222 
    223   /// ReductionList contains the reduction descriptors for all
    224   /// of the reductions that were found in the loop.
    225   using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>;
    226 
    227   /// InductionList saves induction variables and maps them to the
    228   /// induction descriptor.
    229   using InductionList = MapVector<PHINode *, InductionDescriptor>;
    230 
    231   /// RecurrenceSet contains the phi nodes that are recurrences other than
    232   /// inductions and reductions.
    233   using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
    234 
    235   /// Returns true if it is legal to vectorize this loop.
    236   /// This does not mean that it is profitable to vectorize this
    237   /// loop, only that it is legal to do so.
    238   /// Temporarily taking UseVPlanNativePath parameter. If true, take
    239   /// the new code path being implemented for outer loop vectorization
    240   /// (should be functional for inner loop vectorization) based on VPlan.
    241   /// If false, good old LV code.
    242   bool canVectorize(bool UseVPlanNativePath);
    243 
    244   /// Returns the primary induction variable.
    245   PHINode *getPrimaryInduction() { return PrimaryInduction; }
    246 
    247   /// Returns the reduction variables found in the loop.
    248   ReductionList *getReductionVars() { return &Reductions; }
    249 
    250   /// Returns the induction variables found in the loop.
    251   InductionList *getInductionVars() { return &Inductions; }
    252 
    253   /// Return the first-order recurrences found in the loop.
    254   RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
    255 
    256   /// Return the set of instructions to sink to handle first-order recurrences.
    257   DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
    258 
    259   /// Returns the widest induction type.
    260   Type *getWidestInductionType() { return WidestIndTy; }
    261 
    262   /// Returns True if V is a Phi node of an induction variable in this loop.
    263   bool isInductionPhi(const Value *V);
    264 
    265   /// Returns True if V is a cast that is part of an induction def-use chain,
    266   /// and had been proven to be redundant under a runtime guard (in other
    267   /// words, the cast has the same SCEV expression as the induction phi).
    268   bool isCastedInductionVariable(const Value *V);
    269 
    270   /// Returns True if V can be considered as an induction variable in this
    271   /// loop. V can be the induction phi, or some redundant cast in the def-use
    272   /// chain of the inducion phi.
    273   bool isInductionVariable(const Value *V);
    274 
    275   /// Returns True if PN is a reduction variable in this loop.
    276   bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
    277 
    278   /// Returns True if Phi is a first-order recurrence in this loop.
    279   bool isFirstOrderRecurrence(const PHINode *Phi);
    280 
    281   /// Return true if the block BB needs to be predicated in order for the loop
    282   /// to be vectorized.
    283   bool blockNeedsPredication(BasicBlock *BB);
    284 
    285   /// Check if this pointer is consecutive when vectorizing. This happens
    286   /// when the last index of the GEP is the induction variable, or that the
    287   /// pointer itself is an induction variable.
    288   /// This check allows us to vectorize A[idx] into a wide load/store.
    289   /// Returns:
    290   /// 0 - Stride is unknown or non-consecutive.
    291   /// 1 - Address is consecutive.
    292   /// -1 - Address is consecutive, and decreasing.
    293   /// NOTE: This method must only be used before modifying the original scalar
    294   /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
    295   int isConsecutivePtr(Value *Ptr);
    296 
    297   /// Returns true if the value V is uniform within the loop.
    298   bool isUniform(Value *V);
    299 
    300   /// Returns the information that we collected about runtime memory check.
    301   const RuntimePointerChecking *getRuntimePointerChecking() const {
    302     return LAI->getRuntimePointerChecking();
    303   }
    304 
    305   const LoopAccessInfo *getLAI() const { return LAI; }
    306 
    307   unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
    308 
    309   uint64_t getMaxSafeRegisterWidth() const {
    310     return LAI->getDepChecker().getMaxSafeRegisterWidth();
    311   }
    312 
    313   bool hasStride(Value *V) { return LAI->hasStride(V); }
    314 
    315   /// Returns true if vector representation of the instruction \p I
    316   /// requires mask.
    317   bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
    318 
    319   unsigned getNumStores() const { return LAI->getNumStores(); }
    320   unsigned getNumLoads() const { return LAI->getNumLoads(); }
    321 
    322   // Returns true if the NoNaN attribute is set on the function.
    323   bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
    324 
    325 private:
    326   /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
    327   /// its nested loops are considered legal for vectorization. These legal
    328   /// checks are common for inner and outer loop vectorization.
    329   /// Temporarily taking UseVPlanNativePath parameter. If true, take
    330   /// the new code path being implemented for outer loop vectorization
    331   /// (should be functional for inner loop vectorization) based on VPlan.
    332   /// If false, good old LV code.
    333   bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
    334 
    335   /// Return true if the pre-header, exiting and latch blocks of \p Lp
    336   /// (non-recursive) are considered legal for vectorization.
    337   /// Temporarily taking UseVPlanNativePath parameter. If true, take
    338   /// the new code path being implemented for outer loop vectorization
    339   /// (should be functional for inner loop vectorization) based on VPlan.
    340   /// If false, good old LV code.
    341   bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
    342 
    343   /// Check if a single basic block loop is vectorizable.
    344   /// At this point we know that this is a loop with a constant trip count
    345   /// and we only need to check individual instructions.
    346   bool canVectorizeInstrs();
    347 
    348   /// When we vectorize loops we may change the order in which
    349   /// we read and write from memory. This method checks if it is
    350   /// legal to vectorize the code, considering only memory constrains.
    351   /// Returns true if the loop is vectorizable
    352   bool canVectorizeMemory();
    353 
    354   /// Return true if we can vectorize this loop using the IF-conversion
    355   /// transformation.
    356   bool canVectorizeWithIfConvert();
    357 
    358   /// Return true if we can vectorize this outer loop. The method performs
    359   /// specific checks for outer loop vectorization.
    360   bool canVectorizeOuterLoop();
    361 
    362   /// Return true if all of the instructions in the block can be speculatively
    363   /// executed. \p SafePtrs is a list of addresses that are known to be legal
    364   /// and we know that we can read from them without segfault.
    365   bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
    366 
    367   /// Updates the vectorization state by adding \p Phi to the inductions list.
    368   /// This can set \p Phi as the main induction of the loop if \p Phi is a
    369   /// better choice for the main induction than the existing one.
    370   void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
    371                        SmallPtrSetImpl<Value *> &AllowedExit);
    372 
    373   /// Create an analysis remark that explains why vectorization failed
    374   ///
    375   /// \p RemarkName is the identifier for the remark.  If \p I is passed it is
    376   /// an instruction that prevents vectorization.  Otherwise the loop is used
    377   /// for the location of the remark.  \return the remark object that can be
    378   /// streamed to.
    379   OptimizationRemarkAnalysis
    380   createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const {
    381     return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
    382                                   RemarkName, TheLoop, I);
    383   }
    384 
    385   /// If an access has a symbolic strides, this maps the pointer value to
    386   /// the stride symbol.
    387   const ValueToValueMap *getSymbolicStrides() {
    388     // FIXME: Currently, the set of symbolic strides is sometimes queried before
    389     // it's collected.  This happens from canVectorizeWithIfConvert, when the
    390     // pointer is checked to reference consecutive elements suitable for a
    391     // masked access.
    392     return LAI ? &LAI->getSymbolicStrides() : nullptr;
    393   }
    394 
    395   /// The loop that we evaluate.
    396   Loop *TheLoop;
    397 
    398   /// Loop Info analysis.
    399   LoopInfo *LI;
    400 
    401   /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
    402   /// Applies dynamic knowledge to simplify SCEV expressions in the context
    403   /// of existing SCEV assumptions. The analysis will also add a minimal set
    404   /// of new predicates if this is required to enable vectorization and
    405   /// unrolling.
    406   PredicatedScalarEvolution &PSE;
    407 
    408   /// Target Library Info.
    409   TargetLibraryInfo *TLI;
    410 
    411   /// Dominator Tree.
    412   DominatorTree *DT;
    413 
    414   // LoopAccess analysis.
    415   std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
    416 
    417   // And the loop-accesses info corresponding to this loop.  This pointer is
    418   // null until canVectorizeMemory sets it up.
    419   const LoopAccessInfo *LAI = nullptr;
    420 
    421   /// Interface to emit optimization remarks.
    422   OptimizationRemarkEmitter *ORE;
    423 
    424   //  ---  vectorization state --- //
    425 
    426   /// Holds the primary induction variable. This is the counter of the
    427   /// loop.
    428   PHINode *PrimaryInduction = nullptr;
    429 
    430   /// Holds the reduction variables.
    431   ReductionList Reductions;
    432 
    433   /// Holds all of the induction variables that we found in the loop.
    434   /// Notice that inductions don't need to start at zero and that induction
    435   /// variables can be pointers.
    436   InductionList Inductions;
    437 
    438   /// Holds all the casts that participate in the update chain of the induction
    439   /// variables, and that have been proven to be redundant (possibly under a
    440   /// runtime guard). These casts can be ignored when creating the vectorized
    441   /// loop body.
    442   SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
    443 
    444   /// Holds the phi nodes that are first-order recurrences.
    445   RecurrenceSet FirstOrderRecurrences;
    446 
    447   /// Holds instructions that need to sink past other instructions to handle
    448   /// first-order recurrences.
    449   DenseMap<Instruction *, Instruction *> SinkAfter;
    450 
    451   /// Holds the widest induction type encountered.
    452   Type *WidestIndTy = nullptr;
    453 
    454   /// Allowed outside users. This holds the induction and reduction
    455   /// vars which can be accessed from outside the loop.
    456   SmallPtrSet<Value *, 4> AllowedExit;
    457 
    458   /// Can we assume the absence of NaNs.
    459   bool HasFunNoNaNAttr = false;
    460 
    461   /// Vectorization requirements that will go through late-evaluation.
    462   LoopVectorizationRequirements *Requirements;
    463 
    464   /// Used to emit an analysis of any legality issues.
    465   LoopVectorizeHints *Hints;
    466 
    467   /// The demanded bits analsyis is used to compute the minimum type size in
    468   /// which a reduction can be computed.
    469   DemandedBits *DB;
    470 
    471   /// The assumption cache analysis is used to compute the minimum type size in
    472   /// which a reduction can be computed.
    473   AssumptionCache *AC;
    474 
    475   /// While vectorizing these instructions we have to generate a
    476   /// call to the appropriate masked intrinsic
    477   SmallPtrSet<const Instruction *, 8> MaskedOp;
    478 };
    479 
    480 } // namespace llvm
    481 
    482 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
    483