1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This pass exposes codegen information to IR-level passes. Every 11 /// transformation that uses codegen information is broken into three parts: 12 /// 1. The IR-level analysis pass. 13 /// 2. The IR-level transformation interface which provides the needed 14 /// information. 15 /// 3. Codegen-level implementation which uses target-specific hooks. 16 /// 17 /// This file defines #2, which is the interface that IR-level transformations 18 /// use for querying the codegen. 19 /// 20 //===----------------------------------------------------------------------===// 21 22 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 23 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 24 25 #include "llvm/ADT/Optional.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/Intrinsics.h" 28 #include "llvm/Pass.h" 29 #include "llvm/Support/DataTypes.h" 30 #include <functional> 31 32 namespace llvm { 33 34 class Function; 35 class GlobalValue; 36 class Loop; 37 class PreservedAnalyses; 38 class Type; 39 class User; 40 class Value; 41 42 /// \brief Information about a load/store intrinsic defined by the target. 43 struct MemIntrinsicInfo { 44 MemIntrinsicInfo() 45 : ReadMem(false), WriteMem(false), Vol(false), MatchingId(0), 46 NumMemRefs(0), PtrVal(nullptr) {} 47 bool ReadMem; 48 bool WriteMem; 49 bool Vol; 50 // Same Id is set by the target for corresponding load/store intrinsics. 51 unsigned short MatchingId; 52 int NumMemRefs; 53 Value *PtrVal; 54 }; 55 56 /// \brief This pass provides access to the codegen interfaces that are needed 57 /// for IR-level transformations. 58 class TargetTransformInfo { 59 public: 60 /// \brief Construct a TTI object using a type implementing the \c Concept 61 /// API below. 62 /// 63 /// This is used by targets to construct a TTI wrapping their target-specific 64 /// implementaion that encodes appropriate costs for their target. 65 template <typename T> TargetTransformInfo(T Impl); 66 67 /// \brief Construct a baseline TTI object using a minimal implementation of 68 /// the \c Concept API below. 69 /// 70 /// The TTI implementation will reflect the information in the DataLayout 71 /// provided if non-null. 72 explicit TargetTransformInfo(const DataLayout *DL); 73 74 // Provide move semantics. 75 TargetTransformInfo(TargetTransformInfo &&Arg); 76 TargetTransformInfo &operator=(TargetTransformInfo &&RHS); 77 78 // We need to define the destructor out-of-line to define our sub-classes 79 // out-of-line. 80 ~TargetTransformInfo(); 81 82 /// \brief Handle the invalidation of this information. 83 /// 84 /// When used as a result of \c TargetIRAnalysis this method will be called 85 /// when the function this was computed for changes. When it returns false, 86 /// the information is preserved across those changes. 87 bool invalidate(Function &, const PreservedAnalyses &) { 88 // FIXME: We should probably in some way ensure that the subtarget 89 // information for a function hasn't changed. 90 return false; 91 } 92 93 /// \name Generic Target Information 94 /// @{ 95 96 /// \brief Underlying constants for 'cost' values in this interface. 97 /// 98 /// Many APIs in this interface return a cost. This enum defines the 99 /// fundamental values that should be used to interpret (and produce) those 100 /// costs. The costs are returned as an unsigned rather than a member of this 101 /// enumeration because it is expected that the cost of one IR instruction 102 /// may have a multiplicative factor to it or otherwise won't fit directly 103 /// into the enum. Moreover, it is common to sum or average costs which works 104 /// better as simple integral values. Thus this enum only provides constants. 105 /// 106 /// Note that these costs should usually reflect the intersection of code-size 107 /// cost and execution cost. A free instruction is typically one that folds 108 /// into another instruction. For example, reg-to-reg moves can often be 109 /// skipped by renaming the registers in the CPU, but they still are encoded 110 /// and thus wouldn't be considered 'free' here. 111 enum TargetCostConstants { 112 TCC_Free = 0, ///< Expected to fold away in lowering. 113 TCC_Basic = 1, ///< The cost of a typical 'add' instruction. 114 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86. 115 }; 116 117 /// \brief Estimate the cost of a specific operation when lowered. 118 /// 119 /// Note that this is designed to work on an arbitrary synthetic opcode, and 120 /// thus work for hypothetical queries before an instruction has even been 121 /// formed. However, this does *not* work for GEPs, and must not be called 122 /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as 123 /// analyzing a GEP's cost required more information. 124 /// 125 /// Typically only the result type is required, and the operand type can be 126 /// omitted. However, if the opcode is one of the cast instructions, the 127 /// operand type is required. 128 /// 129 /// The returned cost is defined in terms of \c TargetCostConstants, see its 130 /// comments for a detailed explanation of the cost values. 131 unsigned getOperationCost(unsigned Opcode, Type *Ty, 132 Type *OpTy = nullptr) const; 133 134 /// \brief Estimate the cost of a GEP operation when lowered. 135 /// 136 /// The contract for this function is the same as \c getOperationCost except 137 /// that it supports an interface that provides extra information specific to 138 /// the GEP operation. 139 unsigned getGEPCost(const Value *Ptr, ArrayRef<const Value *> Operands) const; 140 141 /// \brief Estimate the cost of a function call when lowered. 142 /// 143 /// The contract for this is the same as \c getOperationCost except that it 144 /// supports an interface that provides extra information specific to call 145 /// instructions. 146 /// 147 /// This is the most basic query for estimating call cost: it only knows the 148 /// function type and (potentially) the number of arguments at the call site. 149 /// The latter is only interesting for varargs function types. 150 unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const; 151 152 /// \brief Estimate the cost of calling a specific function when lowered. 153 /// 154 /// This overload adds the ability to reason about the particular function 155 /// being called in the event it is a library call with special lowering. 156 unsigned getCallCost(const Function *F, int NumArgs = -1) const; 157 158 /// \brief Estimate the cost of calling a specific function when lowered. 159 /// 160 /// This overload allows specifying a set of candidate argument values. 161 unsigned getCallCost(const Function *F, 162 ArrayRef<const Value *> Arguments) const; 163 164 /// \brief Estimate the cost of an intrinsic when lowered. 165 /// 166 /// Mirrors the \c getCallCost method but uses an intrinsic identifier. 167 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 168 ArrayRef<Type *> ParamTys) const; 169 170 /// \brief Estimate the cost of an intrinsic when lowered. 171 /// 172 /// Mirrors the \c getCallCost method but uses an intrinsic identifier. 173 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 174 ArrayRef<const Value *> Arguments) const; 175 176 /// \brief Estimate the cost of a given IR user when lowered. 177 /// 178 /// This can estimate the cost of either a ConstantExpr or Instruction when 179 /// lowered. It has two primary advantages over the \c getOperationCost and 180 /// \c getGEPCost above, and one significant disadvantage: it can only be 181 /// used when the IR construct has already been formed. 182 /// 183 /// The advantages are that it can inspect the SSA use graph to reason more 184 /// accurately about the cost. For example, all-constant-GEPs can often be 185 /// folded into a load or other instruction, but if they are used in some 186 /// other context they may not be folded. This routine can distinguish such 187 /// cases. 188 /// 189 /// The returned cost is defined in terms of \c TargetCostConstants, see its 190 /// comments for a detailed explanation of the cost values. 191 unsigned getUserCost(const User *U) const; 192 193 /// \brief Return true if branch divergence exists. 194 /// 195 /// Branch divergence has a significantly negative impact on GPU performance 196 /// when threads in the same wavefront take different paths due to conditional 197 /// branches. 198 bool hasBranchDivergence() const; 199 200 /// \brief Returns whether V is a source of divergence. 201 /// 202 /// This function provides the target-dependent information for 203 /// the target-independent DivergenceAnalysis. DivergenceAnalysis first 204 /// builds the dependency graph, and then runs the reachability algorithm 205 /// starting with the sources of divergence. 206 bool isSourceOfDivergence(const Value *V) const; 207 208 /// \brief Test whether calls to a function lower to actual program function 209 /// calls. 210 /// 211 /// The idea is to test whether the program is likely to require a 'call' 212 /// instruction or equivalent in order to call the given function. 213 /// 214 /// FIXME: It's not clear that this is a good or useful query API. Client's 215 /// should probably move to simpler cost metrics using the above. 216 /// Alternatively, we could split the cost interface into distinct code-size 217 /// and execution-speed costs. This would allow modelling the core of this 218 /// query more accurately as a call is a single small instruction, but 219 /// incurs significant execution cost. 220 bool isLoweredToCall(const Function *F) const; 221 222 /// Parameters that control the generic loop unrolling transformation. 223 struct UnrollingPreferences { 224 /// The cost threshold for the unrolled loop, compared to 225 /// CodeMetrics.NumInsts aggregated over all basic blocks in the loop body. 226 /// The unrolling factor is set such that the unrolled loop body does not 227 /// exceed this cost. Set this to UINT_MAX to disable the loop body cost 228 /// restriction. 229 unsigned Threshold; 230 /// If complete unrolling could help other optimizations (e.g. InstSimplify) 231 /// to remove N% of instructions, then we can go beyond unroll threshold. 232 /// This value set the minimal percent for allowing that. 233 unsigned MinPercentOfOptimized; 234 /// The absolute cost threshold. We won't go beyond this even if complete 235 /// unrolling could result in optimizing out 90% of instructions. 236 unsigned AbsoluteThreshold; 237 /// The cost threshold for the unrolled loop when optimizing for size (set 238 /// to UINT_MAX to disable). 239 unsigned OptSizeThreshold; 240 /// The cost threshold for the unrolled loop, like Threshold, but used 241 /// for partial/runtime unrolling (set to UINT_MAX to disable). 242 unsigned PartialThreshold; 243 /// The cost threshold for the unrolled loop when optimizing for size, like 244 /// OptSizeThreshold, but used for partial/runtime unrolling (set to 245 /// UINT_MAX to disable). 246 unsigned PartialOptSizeThreshold; 247 /// A forced unrolling factor (the number of concatenated bodies of the 248 /// original loop in the unrolled loop body). When set to 0, the unrolling 249 /// transformation will select an unrolling factor based on the current cost 250 /// threshold and other factors. 251 unsigned Count; 252 // Set the maximum unrolling factor. The unrolling factor may be selected 253 // using the appropriate cost threshold, but may not exceed this number 254 // (set to UINT_MAX to disable). This does not apply in cases where the 255 // loop is being fully unrolled. 256 unsigned MaxCount; 257 /// Allow partial unrolling (unrolling of loops to expand the size of the 258 /// loop body, not only to eliminate small constant-trip-count loops). 259 bool Partial; 260 /// Allow runtime unrolling (unrolling of loops to expand the size of the 261 /// loop body even when the number of loop iterations is not known at 262 /// compile time). 263 bool Runtime; 264 /// Allow emitting expensive instructions (such as divisions) when computing 265 /// the trip count of a loop for runtime unrolling. 266 bool AllowExpensiveTripCount; 267 }; 268 269 /// \brief Get target-customized preferences for the generic loop unrolling 270 /// transformation. The caller will initialize UP with the current 271 /// target-independent defaults. 272 void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; 273 274 /// @} 275 276 /// \name Scalar Target Information 277 /// @{ 278 279 /// \brief Flags indicating the kind of support for population count. 280 /// 281 /// Compared to the SW implementation, HW support is supposed to 282 /// significantly boost the performance when the population is dense, and it 283 /// may or may not degrade performance if the population is sparse. A HW 284 /// support is considered as "Fast" if it can outperform, or is on a par 285 /// with, SW implementation when the population is sparse; otherwise, it is 286 /// considered as "Slow". 287 enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware }; 288 289 /// \brief Return true if the specified immediate is legal add immediate, that 290 /// is the target has add instructions which can add a register with the 291 /// immediate without having to materialize the immediate into a register. 292 bool isLegalAddImmediate(int64_t Imm) const; 293 294 /// \brief Return true if the specified immediate is legal icmp immediate, 295 /// that is the target has icmp instructions which can compare a register 296 /// against the immediate without having to materialize the immediate into a 297 /// register. 298 bool isLegalICmpImmediate(int64_t Imm) const; 299 300 /// \brief Return true if the addressing mode represented by AM is legal for 301 /// this target, for a load/store of the specified type. 302 /// The type may be VoidTy, in which case only return true if the addressing 303 /// mode is legal for a load/store of any legal type. 304 /// TODO: Handle pre/postinc as well. 305 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 306 bool HasBaseReg, int64_t Scale) const; 307 308 /// \brief Return true if the target works with masked instruction 309 /// AVX2 allows masks for consecutive load and store for i32 and i64 elements. 310 /// AVX-512 architecture will also allow masks for non-consecutive memory 311 /// accesses. 312 bool isLegalMaskedStore(Type *DataType, int Consecutive) const; 313 bool isLegalMaskedLoad(Type *DataType, int Consecutive) const; 314 315 /// \brief Return the cost of the scaling factor used in the addressing 316 /// mode represented by AM for this target, for a load/store 317 /// of the specified type. 318 /// If the AM is supported, the return value must be >= 0. 319 /// If the AM is not supported, it returns a negative value. 320 /// TODO: Handle pre/postinc as well. 321 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 322 bool HasBaseReg, int64_t Scale) const; 323 324 /// \brief Return true if it's free to truncate a value of type Ty1 to type 325 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 326 /// by referencing its sub-register AX. 327 bool isTruncateFree(Type *Ty1, Type *Ty2) const; 328 329 /// \brief Return true if it is profitable to hoist instruction in the 330 /// then/else to before if. 331 bool isProfitableToHoist(Instruction *I) const; 332 333 /// \brief Return true if this type is legal. 334 bool isTypeLegal(Type *Ty) const; 335 336 /// \brief Returns the target's jmp_buf alignment in bytes. 337 unsigned getJumpBufAlignment() const; 338 339 /// \brief Returns the target's jmp_buf size in bytes. 340 unsigned getJumpBufSize() const; 341 342 /// \brief Return true if switches should be turned into lookup tables for the 343 /// target. 344 bool shouldBuildLookupTables() const; 345 346 /// \brief Don't restrict interleaved unrolling to small loops. 347 bool enableAggressiveInterleaving(bool LoopHasReductions) const; 348 349 /// \brief Return hardware support for population count. 350 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; 351 352 /// \brief Return true if the hardware has a fast square-root instruction. 353 bool haveFastSqrt(Type *Ty) const; 354 355 /// \brief Return the expected cost of supporting the floating point operation 356 /// of the specified type. 357 unsigned getFPOpCost(Type *Ty) const; 358 359 /// \brief Return the expected cost of materializing for the given integer 360 /// immediate of the specified type. 361 unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; 362 363 /// \brief Return the expected cost of materialization for the given integer 364 /// immediate of the specified type for a given instruction. The cost can be 365 /// zero if the immediate can be folded into the specified instruction. 366 unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, 367 Type *Ty) const; 368 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 369 Type *Ty) const; 370 /// @} 371 372 /// \name Vector Target Information 373 /// @{ 374 375 /// \brief The various kinds of shuffle patterns for vector queries. 376 enum ShuffleKind { 377 SK_Broadcast, ///< Broadcast element 0 to all other elements. 378 SK_Reverse, ///< Reverse the order of the vector. 379 SK_Alternate, ///< Choose alternate elements from vector. 380 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset. 381 SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset. 382 }; 383 384 /// \brief Additional information about an operand's possible values. 385 enum OperandValueKind { 386 OK_AnyValue, // Operand can have any value. 387 OK_UniformValue, // Operand is uniform (splat of a value). 388 OK_UniformConstantValue, // Operand is uniform constant. 389 OK_NonUniformConstantValue // Operand is a non uniform constant value. 390 }; 391 392 /// \brief Additional properties of an operand's values. 393 enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; 394 395 /// \return The number of scalar or vector registers that the target has. 396 /// If 'Vectors' is true, it returns the number of vector registers. If it is 397 /// set to false, it returns the number of scalar registers. 398 unsigned getNumberOfRegisters(bool Vector) const; 399 400 /// \return The width of the largest scalar or vector register type. 401 unsigned getRegisterBitWidth(bool Vector) const; 402 403 /// \return The maximum interleave factor that any transform should try to 404 /// perform for this target. This number depends on the level of parallelism 405 /// and the number of execution units in the CPU. 406 unsigned getMaxInterleaveFactor() const; 407 408 /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc. 409 unsigned 410 getArithmeticInstrCost(unsigned Opcode, Type *Ty, 411 OperandValueKind Opd1Info = OK_AnyValue, 412 OperandValueKind Opd2Info = OK_AnyValue, 413 OperandValueProperties Opd1PropInfo = OP_None, 414 OperandValueProperties Opd2PropInfo = OP_None) const; 415 416 /// \return The cost of a shuffle instruction of kind Kind and of type Tp. 417 /// The index and subtype parameters are used by the subvector insertion and 418 /// extraction shuffle kinds. 419 unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, 420 Type *SubTp = nullptr) const; 421 422 /// \return The expected cost of cast instructions, such as bitcast, trunc, 423 /// zext, etc. 424 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; 425 426 /// \return The expected cost of control-flow related instructions such as 427 /// Phi, Ret, Br. 428 unsigned getCFInstrCost(unsigned Opcode) const; 429 430 /// \returns The expected cost of compare and select instructions. 431 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 432 Type *CondTy = nullptr) const; 433 434 /// \return The expected cost of vector Insert and Extract. 435 /// Use -1 to indicate that there is no information on the index value. 436 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 437 unsigned Index = -1) const; 438 439 /// \return The cost of Load and Store instructions. 440 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 441 unsigned AddressSpace) const; 442 443 /// \return The cost of masked Load and Store instructions. 444 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 445 unsigned AddressSpace) const; 446 447 /// \brief Calculate the cost of performing a vector reduction. 448 /// 449 /// This is the cost of reducing the vector value of type \p Ty to a scalar 450 /// value using the operation denoted by \p Opcode. The form of the reduction 451 /// can either be a pairwise reduction or a reduction that splits the vector 452 /// at every reduction level. 453 /// 454 /// Pairwise: 455 /// (v0, v1, v2, v3) 456 /// ((v0+v1), (v2, v3), undef, undef) 457 /// Split: 458 /// (v0, v1, v2, v3) 459 /// ((v0+v2), (v1+v3), undef, undef) 460 unsigned getReductionCost(unsigned Opcode, Type *Ty, 461 bool IsPairwiseForm) const; 462 463 /// \returns The cost of Intrinsic instructions. 464 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 465 ArrayRef<Type *> Tys) const; 466 467 /// \returns The cost of Call instructions. 468 unsigned getCallInstrCost(Function *F, Type *RetTy, 469 ArrayRef<Type *> Tys) const; 470 471 /// \returns The number of pieces into which the provided type must be 472 /// split during legalization. Zero is returned when the answer is unknown. 473 unsigned getNumberOfParts(Type *Tp) const; 474 475 /// \returns The cost of the address computation. For most targets this can be 476 /// merged into the instruction indexing mode. Some targets might want to 477 /// distinguish between address computation for memory operations on vector 478 /// types and scalar types. Such targets should override this function. 479 /// The 'IsComplex' parameter is a hint that the address computation is likely 480 /// to involve multiple instructions and as such unlikely to be merged into 481 /// the address indexing mode. 482 unsigned getAddressComputationCost(Type *Ty, bool IsComplex = false) const; 483 484 /// \returns The cost, if any, of keeping values of the given types alive 485 /// over a callsite. 486 /// 487 /// Some types may require the use of register classes that do not have 488 /// any callee-saved registers, so would require a spill and fill. 489 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const; 490 491 /// \returns True if the intrinsic is a supported memory intrinsic. Info 492 /// will contain additional information - whether the intrinsic may write 493 /// or read to memory, volatility and the pointer. Info is undefined 494 /// if false is returned. 495 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; 496 497 /// \returns A value which is the result of the given memory intrinsic. New 498 /// instructions may be created to extract the result from the given intrinsic 499 /// memory operation. Returns nullptr if the target cannot create a result 500 /// from the given intrinsic. 501 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 502 Type *ExpectedType) const; 503 504 /// @} 505 506 private: 507 /// \brief The abstract base class used to type erase specific TTI 508 /// implementations. 509 class Concept; 510 511 /// \brief The template model for the base class which wraps a concrete 512 /// implementation in a type erased interface. 513 template <typename T> class Model; 514 515 std::unique_ptr<Concept> TTIImpl; 516 }; 517 518 class TargetTransformInfo::Concept { 519 public: 520 virtual ~Concept() = 0; 521 522 virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0; 523 virtual unsigned getGEPCost(const Value *Ptr, 524 ArrayRef<const Value *> Operands) = 0; 525 virtual unsigned getCallCost(FunctionType *FTy, int NumArgs) = 0; 526 virtual unsigned getCallCost(const Function *F, int NumArgs) = 0; 527 virtual unsigned getCallCost(const Function *F, 528 ArrayRef<const Value *> Arguments) = 0; 529 virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 530 ArrayRef<Type *> ParamTys) = 0; 531 virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 532 ArrayRef<const Value *> Arguments) = 0; 533 virtual unsigned getUserCost(const User *U) = 0; 534 virtual bool hasBranchDivergence() = 0; 535 virtual bool isSourceOfDivergence(const Value *V) = 0; 536 virtual bool isLoweredToCall(const Function *F) = 0; 537 virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0; 538 virtual bool isLegalAddImmediate(int64_t Imm) = 0; 539 virtual bool isLegalICmpImmediate(int64_t Imm) = 0; 540 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 541 int64_t BaseOffset, bool HasBaseReg, 542 int64_t Scale) = 0; 543 virtual bool isLegalMaskedStore(Type *DataType, int Consecutive) = 0; 544 virtual bool isLegalMaskedLoad(Type *DataType, int Consecutive) = 0; 545 virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 546 int64_t BaseOffset, bool HasBaseReg, 547 int64_t Scale) = 0; 548 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; 549 virtual bool isProfitableToHoist(Instruction *I) = 0; 550 virtual bool isTypeLegal(Type *Ty) = 0; 551 virtual unsigned getJumpBufAlignment() = 0; 552 virtual unsigned getJumpBufSize() = 0; 553 virtual bool shouldBuildLookupTables() = 0; 554 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; 555 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; 556 virtual bool haveFastSqrt(Type *Ty) = 0; 557 virtual unsigned getFPOpCost(Type *Ty) = 0; 558 virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) = 0; 559 virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, 560 Type *Ty) = 0; 561 virtual unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, 562 const APInt &Imm, Type *Ty) = 0; 563 virtual unsigned getNumberOfRegisters(bool Vector) = 0; 564 virtual unsigned getRegisterBitWidth(bool Vector) = 0; 565 virtual unsigned getMaxInterleaveFactor() = 0; 566 virtual unsigned 567 getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, 568 OperandValueKind Opd2Info, 569 OperandValueProperties Opd1PropInfo, 570 OperandValueProperties Opd2PropInfo) = 0; 571 virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 572 Type *SubTp) = 0; 573 virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0; 574 virtual unsigned getCFInstrCost(unsigned Opcode) = 0; 575 virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 576 Type *CondTy) = 0; 577 virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 578 unsigned Index) = 0; 579 virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, 580 unsigned Alignment, 581 unsigned AddressSpace) = 0; 582 virtual unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 583 unsigned Alignment, 584 unsigned AddressSpace) = 0; 585 virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, 586 bool IsPairwiseForm) = 0; 587 virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 588 ArrayRef<Type *> Tys) = 0; 589 virtual unsigned getCallInstrCost(Function *F, Type *RetTy, 590 ArrayRef<Type *> Tys) = 0; 591 virtual unsigned getNumberOfParts(Type *Tp) = 0; 592 virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) = 0; 593 virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0; 594 virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, 595 MemIntrinsicInfo &Info) = 0; 596 virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 597 Type *ExpectedType) = 0; 598 }; 599 600 template <typename T> 601 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { 602 T Impl; 603 604 public: 605 Model(T Impl) : Impl(std::move(Impl)) {} 606 ~Model() override {} 607 608 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override { 609 return Impl.getOperationCost(Opcode, Ty, OpTy); 610 } 611 unsigned getGEPCost(const Value *Ptr, 612 ArrayRef<const Value *> Operands) override { 613 return Impl.getGEPCost(Ptr, Operands); 614 } 615 unsigned getCallCost(FunctionType *FTy, int NumArgs) override { 616 return Impl.getCallCost(FTy, NumArgs); 617 } 618 unsigned getCallCost(const Function *F, int NumArgs) override { 619 return Impl.getCallCost(F, NumArgs); 620 } 621 unsigned getCallCost(const Function *F, 622 ArrayRef<const Value *> Arguments) override { 623 return Impl.getCallCost(F, Arguments); 624 } 625 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 626 ArrayRef<Type *> ParamTys) override { 627 return Impl.getIntrinsicCost(IID, RetTy, ParamTys); 628 } 629 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 630 ArrayRef<const Value *> Arguments) override { 631 return Impl.getIntrinsicCost(IID, RetTy, Arguments); 632 } 633 unsigned getUserCost(const User *U) override { return Impl.getUserCost(U); } 634 bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } 635 bool isSourceOfDivergence(const Value *V) override { 636 return Impl.isSourceOfDivergence(V); 637 } 638 bool isLoweredToCall(const Function *F) override { 639 return Impl.isLoweredToCall(F); 640 } 641 void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override { 642 return Impl.getUnrollingPreferences(L, UP); 643 } 644 bool isLegalAddImmediate(int64_t Imm) override { 645 return Impl.isLegalAddImmediate(Imm); 646 } 647 bool isLegalICmpImmediate(int64_t Imm) override { 648 return Impl.isLegalICmpImmediate(Imm); 649 } 650 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 651 bool HasBaseReg, int64_t Scale) override { 652 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, 653 Scale); 654 } 655 bool isLegalMaskedStore(Type *DataType, int Consecutive) override { 656 return Impl.isLegalMaskedStore(DataType, Consecutive); 657 } 658 bool isLegalMaskedLoad(Type *DataType, int Consecutive) override { 659 return Impl.isLegalMaskedLoad(DataType, Consecutive); 660 } 661 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 662 bool HasBaseReg, int64_t Scale) override { 663 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale); 664 } 665 bool isTruncateFree(Type *Ty1, Type *Ty2) override { 666 return Impl.isTruncateFree(Ty1, Ty2); 667 } 668 bool isProfitableToHoist(Instruction *I) override { 669 return Impl.isProfitableToHoist(I); 670 } 671 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } 672 unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); } 673 unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); } 674 bool shouldBuildLookupTables() override { 675 return Impl.shouldBuildLookupTables(); 676 } 677 bool enableAggressiveInterleaving(bool LoopHasReductions) override { 678 return Impl.enableAggressiveInterleaving(LoopHasReductions); 679 } 680 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override { 681 return Impl.getPopcntSupport(IntTyWidthInBit); 682 } 683 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); } 684 685 unsigned getFPOpCost(Type *Ty) override { 686 return Impl.getFPOpCost(Ty); 687 } 688 689 unsigned getIntImmCost(const APInt &Imm, Type *Ty) override { 690 return Impl.getIntImmCost(Imm, Ty); 691 } 692 unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, 693 Type *Ty) override { 694 return Impl.getIntImmCost(Opc, Idx, Imm, Ty); 695 } 696 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 697 Type *Ty) override { 698 return Impl.getIntImmCost(IID, Idx, Imm, Ty); 699 } 700 unsigned getNumberOfRegisters(bool Vector) override { 701 return Impl.getNumberOfRegisters(Vector); 702 } 703 unsigned getRegisterBitWidth(bool Vector) override { 704 return Impl.getRegisterBitWidth(Vector); 705 } 706 unsigned getMaxInterleaveFactor() override { 707 return Impl.getMaxInterleaveFactor(); 708 } 709 unsigned 710 getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, 711 OperandValueKind Opd2Info, 712 OperandValueProperties Opd1PropInfo, 713 OperandValueProperties Opd2PropInfo) override { 714 return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, 715 Opd1PropInfo, Opd2PropInfo); 716 } 717 unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 718 Type *SubTp) override { 719 return Impl.getShuffleCost(Kind, Tp, Index, SubTp); 720 } 721 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override { 722 return Impl.getCastInstrCost(Opcode, Dst, Src); 723 } 724 unsigned getCFInstrCost(unsigned Opcode) override { 725 return Impl.getCFInstrCost(Opcode); 726 } 727 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 728 Type *CondTy) override { 729 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy); 730 } 731 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 732 unsigned Index) override { 733 return Impl.getVectorInstrCost(Opcode, Val, Index); 734 } 735 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 736 unsigned AddressSpace) override { 737 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); 738 } 739 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 740 unsigned AddressSpace) override { 741 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); 742 } 743 unsigned getReductionCost(unsigned Opcode, Type *Ty, 744 bool IsPairwiseForm) override { 745 return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm); 746 } 747 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 748 ArrayRef<Type *> Tys) override { 749 return Impl.getIntrinsicInstrCost(ID, RetTy, Tys); 750 } 751 unsigned getCallInstrCost(Function *F, Type *RetTy, 752 ArrayRef<Type *> Tys) override { 753 return Impl.getCallInstrCost(F, RetTy, Tys); 754 } 755 unsigned getNumberOfParts(Type *Tp) override { 756 return Impl.getNumberOfParts(Tp); 757 } 758 unsigned getAddressComputationCost(Type *Ty, bool IsComplex) override { 759 return Impl.getAddressComputationCost(Ty, IsComplex); 760 } 761 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override { 762 return Impl.getCostOfKeepingLiveOverCall(Tys); 763 } 764 bool getTgtMemIntrinsic(IntrinsicInst *Inst, 765 MemIntrinsicInfo &Info) override { 766 return Impl.getTgtMemIntrinsic(Inst, Info); 767 } 768 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 769 Type *ExpectedType) override { 770 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); 771 } 772 }; 773 774 template <typename T> 775 TargetTransformInfo::TargetTransformInfo(T Impl) 776 : TTIImpl(new Model<T>(Impl)) {} 777 778 /// \brief Analysis pass providing the \c TargetTransformInfo. 779 /// 780 /// The core idea of the TargetIRAnalysis is to expose an interface through 781 /// which LLVM targets can analyze and provide information about the middle 782 /// end's target-independent IR. This supports use cases such as target-aware 783 /// cost modeling of IR constructs. 784 /// 785 /// This is a function analysis because much of the cost modeling for targets 786 /// is done in a subtarget specific way and LLVM supports compiling different 787 /// functions targeting different subtargets in order to support runtime 788 /// dispatch according to the observed subtarget. 789 class TargetIRAnalysis { 790 public: 791 typedef TargetTransformInfo Result; 792 793 /// \brief Opaque, unique identifier for this analysis pass. 794 static void *ID() { return (void *)&PassID; } 795 796 /// \brief Provide access to a name for this pass for debugging purposes. 797 static StringRef name() { return "TargetIRAnalysis"; } 798 799 /// \brief Default construct a target IR analysis. 800 /// 801 /// This will use the module's datalayout to construct a baseline 802 /// conservative TTI result. 803 TargetIRAnalysis(); 804 805 /// \brief Construct an IR analysis pass around a target-provide callback. 806 /// 807 /// The callback will be called with a particular function for which the TTI 808 /// is needed and must return a TTI object for that function. 809 TargetIRAnalysis(std::function<Result(Function &)> TTICallback); 810 811 // Value semantics. We spell out the constructors for MSVC. 812 TargetIRAnalysis(const TargetIRAnalysis &Arg) 813 : TTICallback(Arg.TTICallback) {} 814 TargetIRAnalysis(TargetIRAnalysis &&Arg) 815 : TTICallback(std::move(Arg.TTICallback)) {} 816 TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) { 817 TTICallback = RHS.TTICallback; 818 return *this; 819 } 820 TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) { 821 TTICallback = std::move(RHS.TTICallback); 822 return *this; 823 } 824 825 Result run(Function &F); 826 827 private: 828 static char PassID; 829 830 /// \brief The callback used to produce a result. 831 /// 832 /// We use a completely opaque callback so that targets can provide whatever 833 /// mechanism they desire for constructing the TTI for a given function. 834 /// 835 /// FIXME: Should we really use std::function? It's relatively inefficient. 836 /// It might be possible to arrange for even stateful callbacks to outlive 837 /// the analysis and thus use a function_ref which would be lighter weight. 838 /// This may also be less error prone as the callback is likely to reference 839 /// the external TargetMachine, and that reference needs to never dangle. 840 std::function<Result(Function &)> TTICallback; 841 842 /// \brief Helper function used as the callback in the default constructor. 843 static Result getDefaultTTI(Function &F); 844 }; 845 846 /// \brief Wrapper pass for TargetTransformInfo. 847 /// 848 /// This pass can be constructed from a TTI object which it stores internally 849 /// and is queried by passes. 850 class TargetTransformInfoWrapperPass : public ImmutablePass { 851 TargetIRAnalysis TIRA; 852 Optional<TargetTransformInfo> TTI; 853 854 virtual void anchor(); 855 856 public: 857 static char ID; 858 859 /// \brief We must provide a default constructor for the pass but it should 860 /// never be used. 861 /// 862 /// Use the constructor below or call one of the creation routines. 863 TargetTransformInfoWrapperPass(); 864 865 explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); 866 867 TargetTransformInfo &getTTI(Function &F); 868 }; 869 870 /// \brief Create an analysis pass wrapper around a TTI object. 871 /// 872 /// This analysis pass just holds the TTI instance and makes it available to 873 /// clients. 874 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); 875 876 } // End llvm namespace 877 878 #endif 879