Home | History | Annotate | Download | only in Analysis
      1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 /// \file
     10 /// This pass exposes codegen information to IR-level passes. Every
     11 /// transformation that uses codegen information is broken into three parts:
     12 /// 1. The IR-level analysis pass.
     13 /// 2. The IR-level transformation interface which provides the needed
     14 ///    information.
     15 /// 3. Codegen-level implementation which uses target-specific hooks.
     16 ///
     17 /// This file defines #2, which is the interface that IR-level transformations
     18 /// use for querying the codegen.
     19 ///
     20 //===----------------------------------------------------------------------===//
     21 
     22 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
     23 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
     24 
     25 #include "llvm/ADT/Optional.h"
     26 #include "llvm/IR/IntrinsicInst.h"
     27 #include "llvm/IR/Intrinsics.h"
     28 #include "llvm/Pass.h"
     29 #include "llvm/Support/DataTypes.h"
     30 #include <functional>
     31 
     32 namespace llvm {
     33 
     34 class Function;
     35 class GlobalValue;
     36 class Loop;
     37 class PreservedAnalyses;
     38 class Type;
     39 class User;
     40 class Value;
     41 
     42 /// \brief Information about a load/store intrinsic defined by the target.
     43 struct MemIntrinsicInfo {
     44   MemIntrinsicInfo()
     45       : ReadMem(false), WriteMem(false), Vol(false), MatchingId(0),
     46         NumMemRefs(0), PtrVal(nullptr) {}
     47   bool ReadMem;
     48   bool WriteMem;
     49   bool Vol;
     50   // Same Id is set by the target for corresponding load/store intrinsics.
     51   unsigned short MatchingId;
     52   int NumMemRefs;
     53   Value *PtrVal;
     54 };
     55 
     56 /// \brief This pass provides access to the codegen interfaces that are needed
     57 /// for IR-level transformations.
     58 class TargetTransformInfo {
     59 public:
     60   /// \brief Construct a TTI object using a type implementing the \c Concept
     61   /// API below.
     62   ///
     63   /// This is used by targets to construct a TTI wrapping their target-specific
     64   /// implementaion that encodes appropriate costs for their target.
     65   template <typename T> TargetTransformInfo(T Impl);
     66 
     67   /// \brief Construct a baseline TTI object using a minimal implementation of
     68   /// the \c Concept API below.
     69   ///
     70   /// The TTI implementation will reflect the information in the DataLayout
     71   /// provided if non-null.
     72   explicit TargetTransformInfo(const DataLayout *DL);
     73 
     74   // Provide move semantics.
     75   TargetTransformInfo(TargetTransformInfo &&Arg);
     76   TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
     77 
     78   // We need to define the destructor out-of-line to define our sub-classes
     79   // out-of-line.
     80   ~TargetTransformInfo();
     81 
     82   /// \brief Handle the invalidation of this information.
     83   ///
     84   /// When used as a result of \c TargetIRAnalysis this method will be called
     85   /// when the function this was computed for changes. When it returns false,
     86   /// the information is preserved across those changes.
     87   bool invalidate(Function &, const PreservedAnalyses &) {
     88     // FIXME: We should probably in some way ensure that the subtarget
     89     // information for a function hasn't changed.
     90     return false;
     91   }
     92 
     93   /// \name Generic Target Information
     94   /// @{
     95 
     96   /// \brief Underlying constants for 'cost' values in this interface.
     97   ///
     98   /// Many APIs in this interface return a cost. This enum defines the
     99   /// fundamental values that should be used to interpret (and produce) those
    100   /// costs. The costs are returned as an unsigned rather than a member of this
    101   /// enumeration because it is expected that the cost of one IR instruction
    102   /// may have a multiplicative factor to it or otherwise won't fit directly
    103   /// into the enum. Moreover, it is common to sum or average costs which works
    104   /// better as simple integral values. Thus this enum only provides constants.
    105   ///
    106   /// Note that these costs should usually reflect the intersection of code-size
    107   /// cost and execution cost. A free instruction is typically one that folds
    108   /// into another instruction. For example, reg-to-reg moves can often be
    109   /// skipped by renaming the registers in the CPU, but they still are encoded
    110   /// and thus wouldn't be considered 'free' here.
    111   enum TargetCostConstants {
    112     TCC_Free = 0,     ///< Expected to fold away in lowering.
    113     TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
    114     TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
    115   };
    116 
    117   /// \brief Estimate the cost of a specific operation when lowered.
    118   ///
    119   /// Note that this is designed to work on an arbitrary synthetic opcode, and
    120   /// thus work for hypothetical queries before an instruction has even been
    121   /// formed. However, this does *not* work for GEPs, and must not be called
    122   /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
    123   /// analyzing a GEP's cost required more information.
    124   ///
    125   /// Typically only the result type is required, and the operand type can be
    126   /// omitted. However, if the opcode is one of the cast instructions, the
    127   /// operand type is required.
    128   ///
    129   /// The returned cost is defined in terms of \c TargetCostConstants, see its
    130   /// comments for a detailed explanation of the cost values.
    131   unsigned getOperationCost(unsigned Opcode, Type *Ty,
    132                             Type *OpTy = nullptr) const;
    133 
    134   /// \brief Estimate the cost of a GEP operation when lowered.
    135   ///
    136   /// The contract for this function is the same as \c getOperationCost except
    137   /// that it supports an interface that provides extra information specific to
    138   /// the GEP operation.
    139   unsigned getGEPCost(const Value *Ptr, ArrayRef<const Value *> Operands) const;
    140 
    141   /// \brief Estimate the cost of a function call when lowered.
    142   ///
    143   /// The contract for this is the same as \c getOperationCost except that it
    144   /// supports an interface that provides extra information specific to call
    145   /// instructions.
    146   ///
    147   /// This is the most basic query for estimating call cost: it only knows the
    148   /// function type and (potentially) the number of arguments at the call site.
    149   /// The latter is only interesting for varargs function types.
    150   unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const;
    151 
    152   /// \brief Estimate the cost of calling a specific function when lowered.
    153   ///
    154   /// This overload adds the ability to reason about the particular function
    155   /// being called in the event it is a library call with special lowering.
    156   unsigned getCallCost(const Function *F, int NumArgs = -1) const;
    157 
    158   /// \brief Estimate the cost of calling a specific function when lowered.
    159   ///
    160   /// This overload allows specifying a set of candidate argument values.
    161   unsigned getCallCost(const Function *F,
    162                        ArrayRef<const Value *> Arguments) const;
    163 
    164   /// \brief Estimate the cost of an intrinsic when lowered.
    165   ///
    166   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
    167   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    168                             ArrayRef<Type *> ParamTys) const;
    169 
    170   /// \brief Estimate the cost of an intrinsic when lowered.
    171   ///
    172   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
    173   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    174                             ArrayRef<const Value *> Arguments) const;
    175 
    176   /// \brief Estimate the cost of a given IR user when lowered.
    177   ///
    178   /// This can estimate the cost of either a ConstantExpr or Instruction when
    179   /// lowered. It has two primary advantages over the \c getOperationCost and
    180   /// \c getGEPCost above, and one significant disadvantage: it can only be
    181   /// used when the IR construct has already been formed.
    182   ///
    183   /// The advantages are that it can inspect the SSA use graph to reason more
    184   /// accurately about the cost. For example, all-constant-GEPs can often be
    185   /// folded into a load or other instruction, but if they are used in some
    186   /// other context they may not be folded. This routine can distinguish such
    187   /// cases.
    188   ///
    189   /// The returned cost is defined in terms of \c TargetCostConstants, see its
    190   /// comments for a detailed explanation of the cost values.
    191   unsigned getUserCost(const User *U) const;
    192 
    193   /// \brief Return true if branch divergence exists.
    194   ///
    195   /// Branch divergence has a significantly negative impact on GPU performance
    196   /// when threads in the same wavefront take different paths due to conditional
    197   /// branches.
    198   bool hasBranchDivergence() const;
    199 
    200   /// \brief Returns whether V is a source of divergence.
    201   ///
    202   /// This function provides the target-dependent information for
    203   /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
    204   /// builds the dependency graph, and then runs the reachability algorithm
    205   /// starting with the sources of divergence.
    206   bool isSourceOfDivergence(const Value *V) const;
    207 
    208   /// \brief Test whether calls to a function lower to actual program function
    209   /// calls.
    210   ///
    211   /// The idea is to test whether the program is likely to require a 'call'
    212   /// instruction or equivalent in order to call the given function.
    213   ///
    214   /// FIXME: It's not clear that this is a good or useful query API. Client's
    215   /// should probably move to simpler cost metrics using the above.
    216   /// Alternatively, we could split the cost interface into distinct code-size
    217   /// and execution-speed costs. This would allow modelling the core of this
    218   /// query more accurately as a call is a single small instruction, but
    219   /// incurs significant execution cost.
    220   bool isLoweredToCall(const Function *F) const;
    221 
    222   /// Parameters that control the generic loop unrolling transformation.
    223   struct UnrollingPreferences {
    224     /// The cost threshold for the unrolled loop, compared to
    225     /// CodeMetrics.NumInsts aggregated over all basic blocks in the loop body.
    226     /// The unrolling factor is set such that the unrolled loop body does not
    227     /// exceed this cost. Set this to UINT_MAX to disable the loop body cost
    228     /// restriction.
    229     unsigned Threshold;
    230     /// If complete unrolling could help other optimizations (e.g. InstSimplify)
    231     /// to remove N% of instructions, then we can go beyond unroll threshold.
    232     /// This value set the minimal percent for allowing that.
    233     unsigned MinPercentOfOptimized;
    234     /// The absolute cost threshold. We won't go beyond this even if complete
    235     /// unrolling could result in optimizing out 90% of instructions.
    236     unsigned AbsoluteThreshold;
    237     /// The cost threshold for the unrolled loop when optimizing for size (set
    238     /// to UINT_MAX to disable).
    239     unsigned OptSizeThreshold;
    240     /// The cost threshold for the unrolled loop, like Threshold, but used
    241     /// for partial/runtime unrolling (set to UINT_MAX to disable).
    242     unsigned PartialThreshold;
    243     /// The cost threshold for the unrolled loop when optimizing for size, like
    244     /// OptSizeThreshold, but used for partial/runtime unrolling (set to
    245     /// UINT_MAX to disable).
    246     unsigned PartialOptSizeThreshold;
    247     /// A forced unrolling factor (the number of concatenated bodies of the
    248     /// original loop in the unrolled loop body). When set to 0, the unrolling
    249     /// transformation will select an unrolling factor based on the current cost
    250     /// threshold and other factors.
    251     unsigned Count;
    252     // Set the maximum unrolling factor. The unrolling factor may be selected
    253     // using the appropriate cost threshold, but may not exceed this number
    254     // (set to UINT_MAX to disable). This does not apply in cases where the
    255     // loop is being fully unrolled.
    256     unsigned MaxCount;
    257     /// Allow partial unrolling (unrolling of loops to expand the size of the
    258     /// loop body, not only to eliminate small constant-trip-count loops).
    259     bool Partial;
    260     /// Allow runtime unrolling (unrolling of loops to expand the size of the
    261     /// loop body even when the number of loop iterations is not known at
    262     /// compile time).
    263     bool Runtime;
    264     /// Allow emitting expensive instructions (such as divisions) when computing
    265     /// the trip count of a loop for runtime unrolling.
    266     bool AllowExpensiveTripCount;
    267   };
    268 
    269   /// \brief Get target-customized preferences for the generic loop unrolling
    270   /// transformation. The caller will initialize UP with the current
    271   /// target-independent defaults.
    272   void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
    273 
    274   /// @}
    275 
    276   /// \name Scalar Target Information
    277   /// @{
    278 
    279   /// \brief Flags indicating the kind of support for population count.
    280   ///
    281   /// Compared to the SW implementation, HW support is supposed to
    282   /// significantly boost the performance when the population is dense, and it
    283   /// may or may not degrade performance if the population is sparse. A HW
    284   /// support is considered as "Fast" if it can outperform, or is on a par
    285   /// with, SW implementation when the population is sparse; otherwise, it is
    286   /// considered as "Slow".
    287   enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
    288 
    289   /// \brief Return true if the specified immediate is legal add immediate, that
    290   /// is the target has add instructions which can add a register with the
    291   /// immediate without having to materialize the immediate into a register.
    292   bool isLegalAddImmediate(int64_t Imm) const;
    293 
    294   /// \brief Return true if the specified immediate is legal icmp immediate,
    295   /// that is the target has icmp instructions which can compare a register
    296   /// against the immediate without having to materialize the immediate into a
    297   /// register.
    298   bool isLegalICmpImmediate(int64_t Imm) const;
    299 
    300   /// \brief Return true if the addressing mode represented by AM is legal for
    301   /// this target, for a load/store of the specified type.
    302   /// The type may be VoidTy, in which case only return true if the addressing
    303   /// mode is legal for a load/store of any legal type.
    304   /// TODO: Handle pre/postinc as well.
    305   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    306                              bool HasBaseReg, int64_t Scale) const;
    307 
    308   /// \brief Return true if the target works with masked instruction
    309   /// AVX2 allows masks for consecutive load and store for i32 and i64 elements.
    310   /// AVX-512 architecture will also allow masks for non-consecutive memory
    311   /// accesses.
    312   bool isLegalMaskedStore(Type *DataType, int Consecutive) const;
    313   bool isLegalMaskedLoad(Type *DataType, int Consecutive) const;
    314 
    315   /// \brief Return the cost of the scaling factor used in the addressing
    316   /// mode represented by AM for this target, for a load/store
    317   /// of the specified type.
    318   /// If the AM is supported, the return value must be >= 0.
    319   /// If the AM is not supported, it returns a negative value.
    320   /// TODO: Handle pre/postinc as well.
    321   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    322                            bool HasBaseReg, int64_t Scale) const;
    323 
    324   /// \brief Return true if it's free to truncate a value of type Ty1 to type
    325   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
    326   /// by referencing its sub-register AX.
    327   bool isTruncateFree(Type *Ty1, Type *Ty2) const;
    328 
    329   /// \brief Return true if it is profitable to hoist instruction in the
    330   /// then/else to before if.
    331   bool isProfitableToHoist(Instruction *I) const;
    332 
    333   /// \brief Return true if this type is legal.
    334   bool isTypeLegal(Type *Ty) const;
    335 
    336   /// \brief Returns the target's jmp_buf alignment in bytes.
    337   unsigned getJumpBufAlignment() const;
    338 
    339   /// \brief Returns the target's jmp_buf size in bytes.
    340   unsigned getJumpBufSize() const;
    341 
    342   /// \brief Return true if switches should be turned into lookup tables for the
    343   /// target.
    344   bool shouldBuildLookupTables() const;
    345 
    346   /// \brief Don't restrict interleaved unrolling to small loops.
    347   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
    348 
    349   /// \brief Return hardware support for population count.
    350   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
    351 
    352   /// \brief Return true if the hardware has a fast square-root instruction.
    353   bool haveFastSqrt(Type *Ty) const;
    354 
    355   /// \brief Return the expected cost of supporting the floating point operation
    356   /// of the specified type.
    357   unsigned getFPOpCost(Type *Ty) const;
    358 
    359   /// \brief Return the expected cost of materializing for the given integer
    360   /// immediate of the specified type.
    361   unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
    362 
    363   /// \brief Return the expected cost of materialization for the given integer
    364   /// immediate of the specified type for a given instruction. The cost can be
    365   /// zero if the immediate can be folded into the specified instruction.
    366   unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    367                          Type *Ty) const;
    368   unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
    369                          Type *Ty) const;
    370   /// @}
    371 
    372   /// \name Vector Target Information
    373   /// @{
    374 
    375   /// \brief The various kinds of shuffle patterns for vector queries.
    376   enum ShuffleKind {
    377     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
    378     SK_Reverse,         ///< Reverse the order of the vector.
    379     SK_Alternate,       ///< Choose alternate elements from vector.
    380     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
    381     SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
    382   };
    383 
    384   /// \brief Additional information about an operand's possible values.
    385   enum OperandValueKind {
    386     OK_AnyValue,               // Operand can have any value.
    387     OK_UniformValue,           // Operand is uniform (splat of a value).
    388     OK_UniformConstantValue,   // Operand is uniform constant.
    389     OK_NonUniformConstantValue // Operand is a non uniform constant value.
    390   };
    391 
    392   /// \brief Additional properties of an operand's values.
    393   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
    394 
    395   /// \return The number of scalar or vector registers that the target has.
    396   /// If 'Vectors' is true, it returns the number of vector registers. If it is
    397   /// set to false, it returns the number of scalar registers.
    398   unsigned getNumberOfRegisters(bool Vector) const;
    399 
    400   /// \return The width of the largest scalar or vector register type.
    401   unsigned getRegisterBitWidth(bool Vector) const;
    402 
    403   /// \return The maximum interleave factor that any transform should try to
    404   /// perform for this target. This number depends on the level of parallelism
    405   /// and the number of execution units in the CPU.
    406   unsigned getMaxInterleaveFactor() const;
    407 
    408   /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
    409   unsigned
    410   getArithmeticInstrCost(unsigned Opcode, Type *Ty,
    411                          OperandValueKind Opd1Info = OK_AnyValue,
    412                          OperandValueKind Opd2Info = OK_AnyValue,
    413                          OperandValueProperties Opd1PropInfo = OP_None,
    414                          OperandValueProperties Opd2PropInfo = OP_None) const;
    415 
    416   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
    417   /// The index and subtype parameters are used by the subvector insertion and
    418   /// extraction shuffle kinds.
    419   unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
    420                           Type *SubTp = nullptr) const;
    421 
    422   /// \return The expected cost of cast instructions, such as bitcast, trunc,
    423   /// zext, etc.
    424   unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
    425 
    426   /// \return The expected cost of control-flow related instructions such as
    427   /// Phi, Ret, Br.
    428   unsigned getCFInstrCost(unsigned Opcode) const;
    429 
    430   /// \returns The expected cost of compare and select instructions.
    431   unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    432                               Type *CondTy = nullptr) const;
    433 
    434   /// \return The expected cost of vector Insert and Extract.
    435   /// Use -1 to indicate that there is no information on the index value.
    436   unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
    437                               unsigned Index = -1) const;
    438 
    439   /// \return The cost of Load and Store instructions.
    440   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    441                            unsigned AddressSpace) const;
    442 
    443   /// \return The cost of masked Load and Store instructions.
    444   unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    445                                  unsigned AddressSpace) const;
    446 
    447   /// \brief Calculate the cost of performing a vector reduction.
    448   ///
    449   /// This is the cost of reducing the vector value of type \p Ty to a scalar
    450   /// value using the operation denoted by \p Opcode. The form of the reduction
    451   /// can either be a pairwise reduction or a reduction that splits the vector
    452   /// at every reduction level.
    453   ///
    454   /// Pairwise:
    455   ///  (v0, v1, v2, v3)
    456   ///  ((v0+v1), (v2, v3), undef, undef)
    457   /// Split:
    458   ///  (v0, v1, v2, v3)
    459   ///  ((v0+v2), (v1+v3), undef, undef)
    460   unsigned getReductionCost(unsigned Opcode, Type *Ty,
    461                             bool IsPairwiseForm) const;
    462 
    463   /// \returns The cost of Intrinsic instructions.
    464   unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    465                                  ArrayRef<Type *> Tys) const;
    466 
    467   /// \returns The cost of Call instructions.
    468   unsigned getCallInstrCost(Function *F, Type *RetTy,
    469                             ArrayRef<Type *> Tys) const;
    470 
    471   /// \returns The number of pieces into which the provided type must be
    472   /// split during legalization. Zero is returned when the answer is unknown.
    473   unsigned getNumberOfParts(Type *Tp) const;
    474 
    475   /// \returns The cost of the address computation. For most targets this can be
    476   /// merged into the instruction indexing mode. Some targets might want to
    477   /// distinguish between address computation for memory operations on vector
    478   /// types and scalar types. Such targets should override this function.
    479   /// The 'IsComplex' parameter is a hint that the address computation is likely
    480   /// to involve multiple instructions and as such unlikely to be merged into
    481   /// the address indexing mode.
    482   unsigned getAddressComputationCost(Type *Ty, bool IsComplex = false) const;
    483 
    484   /// \returns The cost, if any, of keeping values of the given types alive
    485   /// over a callsite.
    486   ///
    487   /// Some types may require the use of register classes that do not have
    488   /// any callee-saved registers, so would require a spill and fill.
    489   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
    490 
    491   /// \returns True if the intrinsic is a supported memory intrinsic.  Info
    492   /// will contain additional information - whether the intrinsic may write
    493   /// or read to memory, volatility and the pointer.  Info is undefined
    494   /// if false is returned.
    495   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
    496 
    497   /// \returns A value which is the result of the given memory intrinsic.  New
    498   /// instructions may be created to extract the result from the given intrinsic
    499   /// memory operation.  Returns nullptr if the target cannot create a result
    500   /// from the given intrinsic.
    501   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    502                                            Type *ExpectedType) const;
    503 
    504   /// @}
    505 
    506 private:
    507   /// \brief The abstract base class used to type erase specific TTI
    508   /// implementations.
    509   class Concept;
    510 
    511   /// \brief The template model for the base class which wraps a concrete
    512   /// implementation in a type erased interface.
    513   template <typename T> class Model;
    514 
    515   std::unique_ptr<Concept> TTIImpl;
    516 };
    517 
    518 class TargetTransformInfo::Concept {
    519 public:
    520   virtual ~Concept() = 0;
    521 
    522   virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
    523   virtual unsigned getGEPCost(const Value *Ptr,
    524                               ArrayRef<const Value *> Operands) = 0;
    525   virtual unsigned getCallCost(FunctionType *FTy, int NumArgs) = 0;
    526   virtual unsigned getCallCost(const Function *F, int NumArgs) = 0;
    527   virtual unsigned getCallCost(const Function *F,
    528                                ArrayRef<const Value *> Arguments) = 0;
    529   virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    530                                     ArrayRef<Type *> ParamTys) = 0;
    531   virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    532                                     ArrayRef<const Value *> Arguments) = 0;
    533   virtual unsigned getUserCost(const User *U) = 0;
    534   virtual bool hasBranchDivergence() = 0;
    535   virtual bool isSourceOfDivergence(const Value *V) = 0;
    536   virtual bool isLoweredToCall(const Function *F) = 0;
    537   virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
    538   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
    539   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
    540   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
    541                                      int64_t BaseOffset, bool HasBaseReg,
    542                                      int64_t Scale) = 0;
    543   virtual bool isLegalMaskedStore(Type *DataType, int Consecutive) = 0;
    544   virtual bool isLegalMaskedLoad(Type *DataType, int Consecutive) = 0;
    545   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
    546                                    int64_t BaseOffset, bool HasBaseReg,
    547                                    int64_t Scale) = 0;
    548   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
    549   virtual bool isProfitableToHoist(Instruction *I) = 0;
    550   virtual bool isTypeLegal(Type *Ty) = 0;
    551   virtual unsigned getJumpBufAlignment() = 0;
    552   virtual unsigned getJumpBufSize() = 0;
    553   virtual bool shouldBuildLookupTables() = 0;
    554   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
    555   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
    556   virtual bool haveFastSqrt(Type *Ty) = 0;
    557   virtual unsigned getFPOpCost(Type *Ty) = 0;
    558   virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) = 0;
    559   virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    560                                  Type *Ty) = 0;
    561   virtual unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx,
    562                                  const APInt &Imm, Type *Ty) = 0;
    563   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
    564   virtual unsigned getRegisterBitWidth(bool Vector) = 0;
    565   virtual unsigned getMaxInterleaveFactor() = 0;
    566   virtual unsigned
    567   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    568                          OperandValueKind Opd2Info,
    569                          OperandValueProperties Opd1PropInfo,
    570                          OperandValueProperties Opd2PropInfo) = 0;
    571   virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    572                                   Type *SubTp) = 0;
    573   virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
    574   virtual unsigned getCFInstrCost(unsigned Opcode) = 0;
    575   virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    576                                       Type *CondTy) = 0;
    577   virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
    578                                       unsigned Index) = 0;
    579   virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
    580                                    unsigned Alignment,
    581                                    unsigned AddressSpace) = 0;
    582   virtual unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
    583                                          unsigned Alignment,
    584                                          unsigned AddressSpace) = 0;
    585   virtual unsigned getReductionCost(unsigned Opcode, Type *Ty,
    586                                     bool IsPairwiseForm) = 0;
    587   virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    588                                          ArrayRef<Type *> Tys) = 0;
    589   virtual unsigned getCallInstrCost(Function *F, Type *RetTy,
    590                                     ArrayRef<Type *> Tys) = 0;
    591   virtual unsigned getNumberOfParts(Type *Tp) = 0;
    592   virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) = 0;
    593   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
    594   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
    595                                   MemIntrinsicInfo &Info) = 0;
    596   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    597                                                    Type *ExpectedType) = 0;
    598 };
    599 
    600 template <typename T>
    601 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
    602   T Impl;
    603 
    604 public:
    605   Model(T Impl) : Impl(std::move(Impl)) {}
    606   ~Model() override {}
    607 
    608   unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
    609     return Impl.getOperationCost(Opcode, Ty, OpTy);
    610   }
    611   unsigned getGEPCost(const Value *Ptr,
    612                       ArrayRef<const Value *> Operands) override {
    613     return Impl.getGEPCost(Ptr, Operands);
    614   }
    615   unsigned getCallCost(FunctionType *FTy, int NumArgs) override {
    616     return Impl.getCallCost(FTy, NumArgs);
    617   }
    618   unsigned getCallCost(const Function *F, int NumArgs) override {
    619     return Impl.getCallCost(F, NumArgs);
    620   }
    621   unsigned getCallCost(const Function *F,
    622                        ArrayRef<const Value *> Arguments) override {
    623     return Impl.getCallCost(F, Arguments);
    624   }
    625   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    626                             ArrayRef<Type *> ParamTys) override {
    627     return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
    628   }
    629   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
    630                             ArrayRef<const Value *> Arguments) override {
    631     return Impl.getIntrinsicCost(IID, RetTy, Arguments);
    632   }
    633   unsigned getUserCost(const User *U) override { return Impl.getUserCost(U); }
    634   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
    635   bool isSourceOfDivergence(const Value *V) override {
    636     return Impl.isSourceOfDivergence(V);
    637   }
    638   bool isLoweredToCall(const Function *F) override {
    639     return Impl.isLoweredToCall(F);
    640   }
    641   void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override {
    642     return Impl.getUnrollingPreferences(L, UP);
    643   }
    644   bool isLegalAddImmediate(int64_t Imm) override {
    645     return Impl.isLegalAddImmediate(Imm);
    646   }
    647   bool isLegalICmpImmediate(int64_t Imm) override {
    648     return Impl.isLegalICmpImmediate(Imm);
    649   }
    650   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    651                              bool HasBaseReg, int64_t Scale) override {
    652     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
    653                                       Scale);
    654   }
    655   bool isLegalMaskedStore(Type *DataType, int Consecutive) override {
    656     return Impl.isLegalMaskedStore(DataType, Consecutive);
    657   }
    658   bool isLegalMaskedLoad(Type *DataType, int Consecutive) override {
    659     return Impl.isLegalMaskedLoad(DataType, Consecutive);
    660   }
    661   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
    662                            bool HasBaseReg, int64_t Scale) override {
    663     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale);
    664   }
    665   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
    666     return Impl.isTruncateFree(Ty1, Ty2);
    667   }
    668   bool isProfitableToHoist(Instruction *I) override {
    669     return Impl.isProfitableToHoist(I);
    670   }
    671   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
    672   unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
    673   unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
    674   bool shouldBuildLookupTables() override {
    675     return Impl.shouldBuildLookupTables();
    676   }
    677   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
    678     return Impl.enableAggressiveInterleaving(LoopHasReductions);
    679   }
    680   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
    681     return Impl.getPopcntSupport(IntTyWidthInBit);
    682   }
    683   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
    684 
    685   unsigned getFPOpCost(Type *Ty) override {
    686     return Impl.getFPOpCost(Ty);
    687   }
    688 
    689   unsigned getIntImmCost(const APInt &Imm, Type *Ty) override {
    690     return Impl.getIntImmCost(Imm, Ty);
    691   }
    692   unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
    693                          Type *Ty) override {
    694     return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
    695   }
    696   unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
    697                          Type *Ty) override {
    698     return Impl.getIntImmCost(IID, Idx, Imm, Ty);
    699   }
    700   unsigned getNumberOfRegisters(bool Vector) override {
    701     return Impl.getNumberOfRegisters(Vector);
    702   }
    703   unsigned getRegisterBitWidth(bool Vector) override {
    704     return Impl.getRegisterBitWidth(Vector);
    705   }
    706   unsigned getMaxInterleaveFactor() override {
    707     return Impl.getMaxInterleaveFactor();
    708   }
    709   unsigned
    710   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
    711                          OperandValueKind Opd2Info,
    712                          OperandValueProperties Opd1PropInfo,
    713                          OperandValueProperties Opd2PropInfo) override {
    714     return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
    715                                        Opd1PropInfo, Opd2PropInfo);
    716   }
    717   unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
    718                           Type *SubTp) override {
    719     return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
    720   }
    721   unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
    722     return Impl.getCastInstrCost(Opcode, Dst, Src);
    723   }
    724   unsigned getCFInstrCost(unsigned Opcode) override {
    725     return Impl.getCFInstrCost(Opcode);
    726   }
    727   unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
    728                               Type *CondTy) override {
    729     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy);
    730   }
    731   unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
    732                               unsigned Index) override {
    733     return Impl.getVectorInstrCost(Opcode, Val, Index);
    734   }
    735   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    736                            unsigned AddressSpace) override {
    737     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
    738   }
    739   unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    740                                  unsigned AddressSpace) override {
    741     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
    742   }
    743   unsigned getReductionCost(unsigned Opcode, Type *Ty,
    744                             bool IsPairwiseForm) override {
    745     return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
    746   }
    747   unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
    748                                  ArrayRef<Type *> Tys) override {
    749     return Impl.getIntrinsicInstrCost(ID, RetTy, Tys);
    750   }
    751   unsigned getCallInstrCost(Function *F, Type *RetTy,
    752                             ArrayRef<Type *> Tys) override {
    753     return Impl.getCallInstrCost(F, RetTy, Tys);
    754   }
    755   unsigned getNumberOfParts(Type *Tp) override {
    756     return Impl.getNumberOfParts(Tp);
    757   }
    758   unsigned getAddressComputationCost(Type *Ty, bool IsComplex) override {
    759     return Impl.getAddressComputationCost(Ty, IsComplex);
    760   }
    761   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
    762     return Impl.getCostOfKeepingLiveOverCall(Tys);
    763   }
    764   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
    765                           MemIntrinsicInfo &Info) override {
    766     return Impl.getTgtMemIntrinsic(Inst, Info);
    767   }
    768   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
    769                                            Type *ExpectedType) override {
    770     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
    771   }
    772 };
    773 
    774 template <typename T>
    775 TargetTransformInfo::TargetTransformInfo(T Impl)
    776     : TTIImpl(new Model<T>(Impl)) {}
    777 
    778 /// \brief Analysis pass providing the \c TargetTransformInfo.
    779 ///
    780 /// The core idea of the TargetIRAnalysis is to expose an interface through
    781 /// which LLVM targets can analyze and provide information about the middle
    782 /// end's target-independent IR. This supports use cases such as target-aware
    783 /// cost modeling of IR constructs.
    784 ///
    785 /// This is a function analysis because much of the cost modeling for targets
    786 /// is done in a subtarget specific way and LLVM supports compiling different
    787 /// functions targeting different subtargets in order to support runtime
    788 /// dispatch according to the observed subtarget.
    789 class TargetIRAnalysis {
    790 public:
    791   typedef TargetTransformInfo Result;
    792 
    793   /// \brief Opaque, unique identifier for this analysis pass.
    794   static void *ID() { return (void *)&PassID; }
    795 
    796   /// \brief Provide access to a name for this pass for debugging purposes.
    797   static StringRef name() { return "TargetIRAnalysis"; }
    798 
    799   /// \brief Default construct a target IR analysis.
    800   ///
    801   /// This will use the module's datalayout to construct a baseline
    802   /// conservative TTI result.
    803   TargetIRAnalysis();
    804 
    805   /// \brief Construct an IR analysis pass around a target-provide callback.
    806   ///
    807   /// The callback will be called with a particular function for which the TTI
    808   /// is needed and must return a TTI object for that function.
    809   TargetIRAnalysis(std::function<Result(Function &)> TTICallback);
    810 
    811   // Value semantics. We spell out the constructors for MSVC.
    812   TargetIRAnalysis(const TargetIRAnalysis &Arg)
    813       : TTICallback(Arg.TTICallback) {}
    814   TargetIRAnalysis(TargetIRAnalysis &&Arg)
    815       : TTICallback(std::move(Arg.TTICallback)) {}
    816   TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
    817     TTICallback = RHS.TTICallback;
    818     return *this;
    819   }
    820   TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
    821     TTICallback = std::move(RHS.TTICallback);
    822     return *this;
    823   }
    824 
    825   Result run(Function &F);
    826 
    827 private:
    828   static char PassID;
    829 
    830   /// \brief The callback used to produce a result.
    831   ///
    832   /// We use a completely opaque callback so that targets can provide whatever
    833   /// mechanism they desire for constructing the TTI for a given function.
    834   ///
    835   /// FIXME: Should we really use std::function? It's relatively inefficient.
    836   /// It might be possible to arrange for even stateful callbacks to outlive
    837   /// the analysis and thus use a function_ref which would be lighter weight.
    838   /// This may also be less error prone as the callback is likely to reference
    839   /// the external TargetMachine, and that reference needs to never dangle.
    840   std::function<Result(Function &)> TTICallback;
    841 
    842   /// \brief Helper function used as the callback in the default constructor.
    843   static Result getDefaultTTI(Function &F);
    844 };
    845 
    846 /// \brief Wrapper pass for TargetTransformInfo.
    847 ///
    848 /// This pass can be constructed from a TTI object which it stores internally
    849 /// and is queried by passes.
    850 class TargetTransformInfoWrapperPass : public ImmutablePass {
    851   TargetIRAnalysis TIRA;
    852   Optional<TargetTransformInfo> TTI;
    853 
    854   virtual void anchor();
    855 
    856 public:
    857   static char ID;
    858 
    859   /// \brief We must provide a default constructor for the pass but it should
    860   /// never be used.
    861   ///
    862   /// Use the constructor below or call one of the creation routines.
    863   TargetTransformInfoWrapperPass();
    864 
    865   explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
    866 
    867   TargetTransformInfo &getTTI(Function &F);
    868 };
    869 
    870 /// \brief Create an analysis pass wrapper around a TTI object.
    871 ///
    872 /// This analysis pass just holds the TTI instance and makes it available to
    873 /// clients.
    874 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
    875 
    876 } // End llvm namespace
    877 
    878 #endif
    879