1 //===- llvm/Analysis/TargetTransformInfo.h ----------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass exposes codegen information to IR-level passes. Every 11 // transformation that uses codegen information is broken into three parts: 12 // 1. The IR-level analysis pass. 13 // 2. The IR-level transformation interface which provides the needed 14 // information. 15 // 3. Codegen-level implementation which uses target-specific hooks. 16 // 17 // This file defines #2, which is the interface that IR-level transformations 18 // use for querying the codegen. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 23 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 24 25 #include "llvm/IR/Intrinsics.h" 26 #include "llvm/Pass.h" 27 #include "llvm/Support/DataTypes.h" 28 29 namespace llvm { 30 31 class GlobalValue; 32 class Type; 33 class User; 34 class Value; 35 36 /// TargetTransformInfo - This pass provides access to the codegen 37 /// interfaces that are needed for IR-level transformations. 38 class TargetTransformInfo { 39 protected: 40 /// \brief The TTI instance one level down the stack. 41 /// 42 /// This is used to implement the default behavior all of the methods which 43 /// is to delegate up through the stack of TTIs until one can answer the 44 /// query. 45 TargetTransformInfo *PrevTTI; 46 47 /// \brief The top of the stack of TTI analyses available. 48 /// 49 /// This is a convenience routine maintained as TTI analyses become available 50 /// that complements the PrevTTI delegation chain. When one part of an 51 /// analysis pass wants to query another part of the analysis pass it can use 52 /// this to start back at the top of the stack. 53 TargetTransformInfo *TopTTI; 54 55 /// All pass subclasses must in their initializePass routine call 56 /// pushTTIStack with themselves to update the pointers tracking the previous 57 /// TTI instance in the analysis group's stack, and the top of the analysis 58 /// group's stack. 59 void pushTTIStack(Pass *P); 60 61 /// All pass subclasses must in their finalizePass routine call popTTIStack 62 /// to update the pointers tracking the previous TTI instance in the analysis 63 /// group's stack, and the top of the analysis group's stack. 64 void popTTIStack(); 65 66 /// All pass subclasses must call TargetTransformInfo::getAnalysisUsage. 67 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 68 69 public: 70 /// This class is intended to be subclassed by real implementations. 71 virtual ~TargetTransformInfo() = 0; 72 73 /// \name Generic Target Information 74 /// @{ 75 76 /// \brief Underlying constants for 'cost' values in this interface. 77 /// 78 /// Many APIs in this interface return a cost. This enum defines the 79 /// fundamental values that should be used to interpret (and produce) those 80 /// costs. The costs are returned as an unsigned rather than a member of this 81 /// enumeration because it is expected that the cost of one IR instruction 82 /// may have a multiplicative factor to it or otherwise won't fit directly 83 /// into the enum. Moreover, it is common to sum or average costs which works 84 /// better as simple integral values. Thus this enum only provides constants. 85 /// 86 /// Note that these costs should usually reflect the intersection of code-size 87 /// cost and execution cost. A free instruction is typically one that folds 88 /// into another instruction. For example, reg-to-reg moves can often be 89 /// skipped by renaming the registers in the CPU, but they still are encoded 90 /// and thus wouldn't be considered 'free' here. 91 enum TargetCostConstants { 92 TCC_Free = 0, ///< Expected to fold away in lowering. 93 TCC_Basic = 1, ///< The cost of a typical 'add' instruction. 94 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86. 95 }; 96 97 /// \brief Estimate the cost of a specific operation when lowered. 98 /// 99 /// Note that this is designed to work on an arbitrary synthetic opcode, and 100 /// thus work for hypothetical queries before an instruction has even been 101 /// formed. However, this does *not* work for GEPs, and must not be called 102 /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as 103 /// analyzing a GEP's cost required more information. 104 /// 105 /// Typically only the result type is required, and the operand type can be 106 /// omitted. However, if the opcode is one of the cast instructions, the 107 /// operand type is required. 108 /// 109 /// The returned cost is defined in terms of \c TargetCostConstants, see its 110 /// comments for a detailed explanation of the cost values. 111 virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, 112 Type *OpTy = 0) const; 113 114 /// \brief Estimate the cost of a GEP operation when lowered. 115 /// 116 /// The contract for this function is the same as \c getOperationCost except 117 /// that it supports an interface that provides extra information specific to 118 /// the GEP operation. 119 virtual unsigned getGEPCost(const Value *Ptr, 120 ArrayRef<const Value *> Operands) const; 121 122 /// \brief Estimate the cost of a function call when lowered. 123 /// 124 /// The contract for this is the same as \c getOperationCost except that it 125 /// supports an interface that provides extra information specific to call 126 /// instructions. 127 /// 128 /// This is the most basic query for estimating call cost: it only knows the 129 /// function type and (potentially) the number of arguments at the call site. 130 /// The latter is only interesting for varargs function types. 131 virtual unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const; 132 133 /// \brief Estimate the cost of calling a specific function when lowered. 134 /// 135 /// This overload adds the ability to reason about the particular function 136 /// being called in the event it is a library call with special lowering. 137 virtual unsigned getCallCost(const Function *F, int NumArgs = -1) const; 138 139 /// \brief Estimate the cost of calling a specific function when lowered. 140 /// 141 /// This overload allows specifying a set of candidate argument values. 142 virtual unsigned getCallCost(const Function *F, 143 ArrayRef<const Value *> Arguments) const; 144 145 /// \brief Estimate the cost of an intrinsic when lowered. 146 /// 147 /// Mirrors the \c getCallCost method but uses an intrinsic identifier. 148 virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 149 ArrayRef<Type *> ParamTys) const; 150 151 /// \brief Estimate the cost of an intrinsic when lowered. 152 /// 153 /// Mirrors the \c getCallCost method but uses an intrinsic identifier. 154 virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 155 ArrayRef<const Value *> Arguments) const; 156 157 /// \brief Estimate the cost of a given IR user when lowered. 158 /// 159 /// This can estimate the cost of either a ConstantExpr or Instruction when 160 /// lowered. It has two primary advantages over the \c getOperationCost and 161 /// \c getGEPCost above, and one significant disadvantage: it can only be 162 /// used when the IR construct has already been formed. 163 /// 164 /// The advantages are that it can inspect the SSA use graph to reason more 165 /// accurately about the cost. For example, all-constant-GEPs can often be 166 /// folded into a load or other instruction, but if they are used in some 167 /// other context they may not be folded. This routine can distinguish such 168 /// cases. 169 /// 170 /// The returned cost is defined in terms of \c TargetCostConstants, see its 171 /// comments for a detailed explanation of the cost values. 172 virtual unsigned getUserCost(const User *U) const; 173 174 /// \brief hasBranchDivergence - Return true if branch divergence exists. 175 /// Branch divergence has a significantly negative impact on GPU performance 176 /// when threads in the same wavefront take different paths due to conditional 177 /// branches. 178 virtual bool hasBranchDivergence() const; 179 180 /// \brief Test whether calls to a function lower to actual program function 181 /// calls. 182 /// 183 /// The idea is to test whether the program is likely to require a 'call' 184 /// instruction or equivalent in order to call the given function. 185 /// 186 /// FIXME: It's not clear that this is a good or useful query API. Client's 187 /// should probably move to simpler cost metrics using the above. 188 /// Alternatively, we could split the cost interface into distinct code-size 189 /// and execution-speed costs. This would allow modelling the core of this 190 /// query more accurately as the a call is a single small instruction, but 191 /// incurs significant execution cost. 192 virtual bool isLoweredToCall(const Function *F) const; 193 194 /// @} 195 196 /// \name Scalar Target Information 197 /// @{ 198 199 /// \brief Flags indicating the kind of support for population count. 200 /// 201 /// Compared to the SW implementation, HW support is supposed to 202 /// significantly boost the performance when the population is dense, and it 203 /// may or may not degrade performance if the population is sparse. A HW 204 /// support is considered as "Fast" if it can outperform, or is on a par 205 /// with, SW implementation when the population is sparse; otherwise, it is 206 /// considered as "Slow". 207 enum PopcntSupportKind { 208 PSK_Software, 209 PSK_SlowHardware, 210 PSK_FastHardware 211 }; 212 213 /// isLegalAddImmediate - Return true if the specified immediate is legal 214 /// add immediate, that is the target has add instructions which can add 215 /// a register with the immediate without having to materialize the 216 /// immediate into a register. 217 virtual bool isLegalAddImmediate(int64_t Imm) const; 218 219 /// isLegalICmpImmediate - Return true if the specified immediate is legal 220 /// icmp immediate, that is the target has icmp instructions which can compare 221 /// a register against the immediate without having to materialize the 222 /// immediate into a register. 223 virtual bool isLegalICmpImmediate(int64_t Imm) const; 224 225 /// isLegalAddressingMode - Return true if the addressing mode represented by 226 /// AM is legal for this target, for a load/store of the specified type. 227 /// The type may be VoidTy, in which case only return true if the addressing 228 /// mode is legal for a load/store of any legal type. 229 /// TODO: Handle pre/postinc as well. 230 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 231 int64_t BaseOffset, bool HasBaseReg, 232 int64_t Scale) const; 233 234 /// \brief Return the cost of the scaling factor used in the addressing 235 /// mode represented by AM for this target, for a load/store 236 /// of the specified type. 237 /// If the AM is supported, the return value must be >= 0. 238 /// If the AM is not supported, it returns a negative value. 239 /// TODO: Handle pre/postinc as well. 240 virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 241 int64_t BaseOffset, bool HasBaseReg, 242 int64_t Scale) const; 243 244 /// isTruncateFree - Return true if it's free to truncate a value of 245 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 246 /// register EAX to i16 by referencing its sub-register AX. 247 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; 248 249 /// Is this type legal. 250 virtual bool isTypeLegal(Type *Ty) const; 251 252 /// getJumpBufAlignment - returns the target's jmp_buf alignment in bytes 253 virtual unsigned getJumpBufAlignment() const; 254 255 /// getJumpBufSize - returns the target's jmp_buf size in bytes. 256 virtual unsigned getJumpBufSize() const; 257 258 /// shouldBuildLookupTables - Return true if switches should be turned into 259 /// lookup tables for the target. 260 virtual bool shouldBuildLookupTables() const; 261 262 /// getPopcntSupport - Return hardware support for population count. 263 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; 264 265 /// getIntImmCost - Return the expected cost of materializing the given 266 /// integer immediate of the specified type. 267 virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; 268 269 /// @} 270 271 /// \name Vector Target Information 272 /// @{ 273 274 /// \brief The various kinds of shuffle patterns for vector queries. 275 enum ShuffleKind { 276 SK_Broadcast, ///< Broadcast element 0 to all other elements. 277 SK_Reverse, ///< Reverse the order of the vector. 278 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset. 279 SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset. 280 }; 281 282 /// \brief Additonal information about an operand's possible values. 283 enum OperandValueKind { 284 OK_AnyValue, // Operand can have any value. 285 OK_UniformValue, // Operand is uniform (splat of a value). 286 OK_UniformConstantValue // Operand is uniform constant. 287 }; 288 289 /// \return The number of scalar or vector registers that the target has. 290 /// If 'Vectors' is true, it returns the number of vector registers. If it is 291 /// set to false, it returns the number of scalar registers. 292 virtual unsigned getNumberOfRegisters(bool Vector) const; 293 294 /// \return The width of the largest scalar or vector register type. 295 virtual unsigned getRegisterBitWidth(bool Vector) const; 296 297 /// \return The maximum unroll factor that the vectorizer should try to 298 /// perform for this target. This number depends on the level of parallelism 299 /// and the number of execution units in the CPU. 300 virtual unsigned getMaximumUnrollFactor() const; 301 302 /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc. 303 virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 304 OperandValueKind Opd1Info = OK_AnyValue, 305 OperandValueKind Opd2Info = OK_AnyValue) const; 306 307 /// \return The cost of a shuffle instruction of kind Kind and of type Tp. 308 /// The index and subtype parameters are used by the subvector insertion and 309 /// extraction shuffle kinds. 310 virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, 311 Type *SubTp = 0) const; 312 313 /// \return The expected cost of cast instructions, such as bitcast, trunc, 314 /// zext, etc. 315 virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, 316 Type *Src) const; 317 318 /// \return The expected cost of control-flow related instructions such as 319 /// Phi, Ret, Br. 320 virtual unsigned getCFInstrCost(unsigned Opcode) const; 321 322 /// \returns The expected cost of compare and select instructions. 323 virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 324 Type *CondTy = 0) const; 325 326 /// \return The expected cost of vector Insert and Extract. 327 /// Use -1 to indicate that there is no information on the index value. 328 virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 329 unsigned Index = -1) const; 330 331 /// \return The cost of Load and Store instructions. 332 virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, 333 unsigned Alignment, 334 unsigned AddressSpace) const; 335 336 /// \returns The cost of Intrinsic instructions. 337 virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 338 ArrayRef<Type *> Tys) const; 339 340 /// \returns The number of pieces into which the provided type must be 341 /// split during legalization. Zero is returned when the answer is unknown. 342 virtual unsigned getNumberOfParts(Type *Tp) const; 343 344 /// \returns The cost of the address computation. For most targets this can be 345 /// merged into the instruction indexing mode. Some targets might want to 346 /// distinguish between address computation for memory operations on vector 347 /// types and scalar types. Such targets should override this function. 348 /// The 'IsComplex' parameter is a hint that the address computation is likely 349 /// to involve multiple instructions and as such unlikely to be merged into 350 /// the address indexing mode. 351 virtual unsigned getAddressComputationCost(Type *Ty, 352 bool IsComplex = false) const; 353 354 /// @} 355 356 /// Analysis group identification. 357 static char ID; 358 }; 359 360 /// \brief Create the base case instance of a pass in the TTI analysis group. 361 /// 362 /// This class provides the base case for the stack of TTI analyzes. It doesn't 363 /// delegate to anything and uses the STTI and VTTI objects passed in to 364 /// satisfy the queries. 365 ImmutablePass *createNoTargetTransformInfoPass(); 366 367 } // End llvm namespace 368 369 #endif 370