1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This pass exposes codegen information to IR-level passes. Every 11 /// transformation that uses codegen information is broken into three parts: 12 /// 1. The IR-level analysis pass. 13 /// 2. The IR-level transformation interface which provides the needed 14 /// information. 15 /// 3. Codegen-level implementation which uses target-specific hooks. 16 /// 17 /// This file defines #2, which is the interface that IR-level transformations 18 /// use for querying the codegen. 19 /// 20 //===----------------------------------------------------------------------===// 21 22 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 23 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 24 25 #include "llvm/ADT/Optional.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/Intrinsics.h" 28 #include "llvm/IR/Operator.h" 29 #include "llvm/IR/PassManager.h" 30 #include "llvm/Pass.h" 31 #include "llvm/Support/DataTypes.h" 32 #include <functional> 33 34 namespace llvm { 35 36 class Function; 37 class GlobalValue; 38 class Loop; 39 class Type; 40 class User; 41 class Value; 42 43 /// \brief Information about a load/store intrinsic defined by the target. 44 struct MemIntrinsicInfo { 45 MemIntrinsicInfo() 46 : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0), 47 NumMemRefs(0), PtrVal(nullptr) {} 48 bool ReadMem; 49 bool WriteMem; 50 /// True only if this memory operation is non-volatile, non-atomic, and 51 /// unordered. (See LoadInst/StoreInst for details on each) 52 bool IsSimple; 53 // Same Id is set by the target for corresponding load/store intrinsics. 54 unsigned short MatchingId; 55 int NumMemRefs; 56 Value *PtrVal; 57 }; 58 59 /// \brief This pass provides access to the codegen interfaces that are needed 60 /// for IR-level transformations. 61 class TargetTransformInfo { 62 public: 63 /// \brief Construct a TTI object using a type implementing the \c Concept 64 /// API below. 65 /// 66 /// This is used by targets to construct a TTI wrapping their target-specific 67 /// implementaion that encodes appropriate costs for their target. 68 template <typename T> TargetTransformInfo(T Impl); 69 70 /// \brief Construct a baseline TTI object using a minimal implementation of 71 /// the \c Concept API below. 72 /// 73 /// The TTI implementation will reflect the information in the DataLayout 74 /// provided if non-null. 75 explicit TargetTransformInfo(const DataLayout &DL); 76 77 // Provide move semantics. 78 TargetTransformInfo(TargetTransformInfo &&Arg); 79 TargetTransformInfo &operator=(TargetTransformInfo &&RHS); 80 81 // We need to define the destructor out-of-line to define our sub-classes 82 // out-of-line. 83 ~TargetTransformInfo(); 84 85 /// \brief Handle the invalidation of this information. 86 /// 87 /// When used as a result of \c TargetIRAnalysis this method will be called 88 /// when the function this was computed for changes. When it returns false, 89 /// the information is preserved across those changes. 90 bool invalidate(Function &, const PreservedAnalyses &) { 91 // FIXME: We should probably in some way ensure that the subtarget 92 // information for a function hasn't changed. 93 return false; 94 } 95 96 /// \name Generic Target Information 97 /// @{ 98 99 /// \brief Underlying constants for 'cost' values in this interface. 100 /// 101 /// Many APIs in this interface return a cost. This enum defines the 102 /// fundamental values that should be used to interpret (and produce) those 103 /// costs. The costs are returned as an int rather than a member of this 104 /// enumeration because it is expected that the cost of one IR instruction 105 /// may have a multiplicative factor to it or otherwise won't fit directly 106 /// into the enum. Moreover, it is common to sum or average costs which works 107 /// better as simple integral values. Thus this enum only provides constants. 108 /// Also note that the returned costs are signed integers to make it natural 109 /// to add, subtract, and test with zero (a common boundary condition). It is 110 /// not expected that 2^32 is a realistic cost to be modeling at any point. 111 /// 112 /// Note that these costs should usually reflect the intersection of code-size 113 /// cost and execution cost. A free instruction is typically one that folds 114 /// into another instruction. For example, reg-to-reg moves can often be 115 /// skipped by renaming the registers in the CPU, but they still are encoded 116 /// and thus wouldn't be considered 'free' here. 117 enum TargetCostConstants { 118 TCC_Free = 0, ///< Expected to fold away in lowering. 119 TCC_Basic = 1, ///< The cost of a typical 'add' instruction. 120 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86. 121 }; 122 123 /// \brief Estimate the cost of a specific operation when lowered. 124 /// 125 /// Note that this is designed to work on an arbitrary synthetic opcode, and 126 /// thus work for hypothetical queries before an instruction has even been 127 /// formed. However, this does *not* work for GEPs, and must not be called 128 /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as 129 /// analyzing a GEP's cost required more information. 130 /// 131 /// Typically only the result type is required, and the operand type can be 132 /// omitted. However, if the opcode is one of the cast instructions, the 133 /// operand type is required. 134 /// 135 /// The returned cost is defined in terms of \c TargetCostConstants, see its 136 /// comments for a detailed explanation of the cost values. 137 int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const; 138 139 /// \brief Estimate the cost of a GEP operation when lowered. 140 /// 141 /// The contract for this function is the same as \c getOperationCost except 142 /// that it supports an interface that provides extra information specific to 143 /// the GEP operation. 144 int getGEPCost(Type *PointeeType, const Value *Ptr, 145 ArrayRef<const Value *> Operands) const; 146 147 /// \brief Estimate the cost of a function call when lowered. 148 /// 149 /// The contract for this is the same as \c getOperationCost except that it 150 /// supports an interface that provides extra information specific to call 151 /// instructions. 152 /// 153 /// This is the most basic query for estimating call cost: it only knows the 154 /// function type and (potentially) the number of arguments at the call site. 155 /// The latter is only interesting for varargs function types. 156 int getCallCost(FunctionType *FTy, int NumArgs = -1) const; 157 158 /// \brief Estimate the cost of calling a specific function when lowered. 159 /// 160 /// This overload adds the ability to reason about the particular function 161 /// being called in the event it is a library call with special lowering. 162 int getCallCost(const Function *F, int NumArgs = -1) const; 163 164 /// \brief Estimate the cost of calling a specific function when lowered. 165 /// 166 /// This overload allows specifying a set of candidate argument values. 167 int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const; 168 169 /// \returns A value by which our inlining threshold should be multiplied. 170 /// This is primarily used to bump up the inlining threshold wholesale on 171 /// targets where calls are unusually expensive. 172 /// 173 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of 174 /// individual classes of instructions would be better. 175 unsigned getInliningThresholdMultiplier() const; 176 177 /// \brief Estimate the cost of an intrinsic when lowered. 178 /// 179 /// Mirrors the \c getCallCost method but uses an intrinsic identifier. 180 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 181 ArrayRef<Type *> ParamTys) const; 182 183 /// \brief Estimate the cost of an intrinsic when lowered. 184 /// 185 /// Mirrors the \c getCallCost method but uses an intrinsic identifier. 186 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 187 ArrayRef<const Value *> Arguments) const; 188 189 /// \brief Estimate the cost of a given IR user when lowered. 190 /// 191 /// This can estimate the cost of either a ConstantExpr or Instruction when 192 /// lowered. It has two primary advantages over the \c getOperationCost and 193 /// \c getGEPCost above, and one significant disadvantage: it can only be 194 /// used when the IR construct has already been formed. 195 /// 196 /// The advantages are that it can inspect the SSA use graph to reason more 197 /// accurately about the cost. For example, all-constant-GEPs can often be 198 /// folded into a load or other instruction, but if they are used in some 199 /// other context they may not be folded. This routine can distinguish such 200 /// cases. 201 /// 202 /// The returned cost is defined in terms of \c TargetCostConstants, see its 203 /// comments for a detailed explanation of the cost values. 204 int getUserCost(const User *U) const; 205 206 /// \brief Return true if branch divergence exists. 207 /// 208 /// Branch divergence has a significantly negative impact on GPU performance 209 /// when threads in the same wavefront take different paths due to conditional 210 /// branches. 211 bool hasBranchDivergence() const; 212 213 /// \brief Returns whether V is a source of divergence. 214 /// 215 /// This function provides the target-dependent information for 216 /// the target-independent DivergenceAnalysis. DivergenceAnalysis first 217 /// builds the dependency graph, and then runs the reachability algorithm 218 /// starting with the sources of divergence. 219 bool isSourceOfDivergence(const Value *V) const; 220 221 /// \brief Test whether calls to a function lower to actual program function 222 /// calls. 223 /// 224 /// The idea is to test whether the program is likely to require a 'call' 225 /// instruction or equivalent in order to call the given function. 226 /// 227 /// FIXME: It's not clear that this is a good or useful query API. Client's 228 /// should probably move to simpler cost metrics using the above. 229 /// Alternatively, we could split the cost interface into distinct code-size 230 /// and execution-speed costs. This would allow modelling the core of this 231 /// query more accurately as a call is a single small instruction, but 232 /// incurs significant execution cost. 233 bool isLoweredToCall(const Function *F) const; 234 235 /// Parameters that control the generic loop unrolling transformation. 236 struct UnrollingPreferences { 237 /// The cost threshold for the unrolled loop. Should be relative to the 238 /// getUserCost values returned by this API, and the expectation is that 239 /// the unrolled loop's instructions when run through that interface should 240 /// not exceed this cost. However, this is only an estimate. Also, specific 241 /// loops may be unrolled even with a cost above this threshold if deemed 242 /// profitable. Set this to UINT_MAX to disable the loop body cost 243 /// restriction. 244 unsigned Threshold; 245 /// If complete unrolling will reduce the cost of the loop below its 246 /// expected dynamic cost while rolled by this percentage, apply a discount 247 /// (below) to its unrolled cost. 248 unsigned PercentDynamicCostSavedThreshold; 249 /// The discount applied to the unrolled cost when the *dynamic* cost 250 /// savings of unrolling exceed the \c PercentDynamicCostSavedThreshold. 251 unsigned DynamicCostSavingsDiscount; 252 /// The cost threshold for the unrolled loop when optimizing for size (set 253 /// to UINT_MAX to disable). 254 unsigned OptSizeThreshold; 255 /// The cost threshold for the unrolled loop, like Threshold, but used 256 /// for partial/runtime unrolling (set to UINT_MAX to disable). 257 unsigned PartialThreshold; 258 /// The cost threshold for the unrolled loop when optimizing for size, like 259 /// OptSizeThreshold, but used for partial/runtime unrolling (set to 260 /// UINT_MAX to disable). 261 unsigned PartialOptSizeThreshold; 262 /// A forced unrolling factor (the number of concatenated bodies of the 263 /// original loop in the unrolled loop body). When set to 0, the unrolling 264 /// transformation will select an unrolling factor based on the current cost 265 /// threshold and other factors. 266 unsigned Count; 267 // Set the maximum unrolling factor. The unrolling factor may be selected 268 // using the appropriate cost threshold, but may not exceed this number 269 // (set to UINT_MAX to disable). This does not apply in cases where the 270 // loop is being fully unrolled. 271 unsigned MaxCount; 272 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but 273 /// applies even if full unrolling is selected. This allows a target to fall 274 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount. 275 unsigned FullUnrollMaxCount; 276 /// Allow partial unrolling (unrolling of loops to expand the size of the 277 /// loop body, not only to eliminate small constant-trip-count loops). 278 bool Partial; 279 /// Allow runtime unrolling (unrolling of loops to expand the size of the 280 /// loop body even when the number of loop iterations is not known at 281 /// compile time). 282 bool Runtime; 283 /// Allow generation of a loop remainder (extra iterations after unroll). 284 bool AllowRemainder; 285 /// Allow emitting expensive instructions (such as divisions) when computing 286 /// the trip count of a loop for runtime unrolling. 287 bool AllowExpensiveTripCount; 288 /// Apply loop unroll on any kind of loop 289 /// (mainly to loops that fail runtime unrolling). 290 bool Force; 291 }; 292 293 /// \brief Get target-customized preferences for the generic loop unrolling 294 /// transformation. The caller will initialize UP with the current 295 /// target-independent defaults. 296 void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; 297 298 /// @} 299 300 /// \name Scalar Target Information 301 /// @{ 302 303 /// \brief Flags indicating the kind of support for population count. 304 /// 305 /// Compared to the SW implementation, HW support is supposed to 306 /// significantly boost the performance when the population is dense, and it 307 /// may or may not degrade performance if the population is sparse. A HW 308 /// support is considered as "Fast" if it can outperform, or is on a par 309 /// with, SW implementation when the population is sparse; otherwise, it is 310 /// considered as "Slow". 311 enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware }; 312 313 /// \brief Return true if the specified immediate is legal add immediate, that 314 /// is the target has add instructions which can add a register with the 315 /// immediate without having to materialize the immediate into a register. 316 bool isLegalAddImmediate(int64_t Imm) const; 317 318 /// \brief Return true if the specified immediate is legal icmp immediate, 319 /// that is the target has icmp instructions which can compare a register 320 /// against the immediate without having to materialize the immediate into a 321 /// register. 322 bool isLegalICmpImmediate(int64_t Imm) const; 323 324 /// \brief Return true if the addressing mode represented by AM is legal for 325 /// this target, for a load/store of the specified type. 326 /// The type may be VoidTy, in which case only return true if the addressing 327 /// mode is legal for a load/store of any legal type. 328 /// TODO: Handle pre/postinc as well. 329 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 330 bool HasBaseReg, int64_t Scale, 331 unsigned AddrSpace = 0) const; 332 333 /// \brief Return true if the target supports masked load/store 334 /// AVX2 and AVX-512 targets allow masks for consecutive load and store 335 bool isLegalMaskedStore(Type *DataType) const; 336 bool isLegalMaskedLoad(Type *DataType) const; 337 338 /// \brief Return true if the target supports masked gather/scatter 339 /// AVX-512 fully supports gather and scatter for vectors with 32 and 64 340 /// bits scalar type. 341 bool isLegalMaskedScatter(Type *DataType) const; 342 bool isLegalMaskedGather(Type *DataType) const; 343 344 /// \brief Return the cost of the scaling factor used in the addressing 345 /// mode represented by AM for this target, for a load/store 346 /// of the specified type. 347 /// If the AM is supported, the return value must be >= 0. 348 /// If the AM is not supported, it returns a negative value. 349 /// TODO: Handle pre/postinc as well. 350 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 351 bool HasBaseReg, int64_t Scale, 352 unsigned AddrSpace = 0) const; 353 354 /// \brief Return true if it's free to truncate a value of type Ty1 to type 355 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 356 /// by referencing its sub-register AX. 357 bool isTruncateFree(Type *Ty1, Type *Ty2) const; 358 359 /// \brief Return true if it is profitable to hoist instruction in the 360 /// then/else to before if. 361 bool isProfitableToHoist(Instruction *I) const; 362 363 /// \brief Return true if this type is legal. 364 bool isTypeLegal(Type *Ty) const; 365 366 /// \brief Returns the target's jmp_buf alignment in bytes. 367 unsigned getJumpBufAlignment() const; 368 369 /// \brief Returns the target's jmp_buf size in bytes. 370 unsigned getJumpBufSize() const; 371 372 /// \brief Return true if switches should be turned into lookup tables for the 373 /// target. 374 bool shouldBuildLookupTables() const; 375 376 /// \brief Don't restrict interleaved unrolling to small loops. 377 bool enableAggressiveInterleaving(bool LoopHasReductions) const; 378 379 /// \brief Enable matching of interleaved access groups. 380 bool enableInterleavedAccessVectorization() const; 381 382 /// \brief Indicate that it is potentially unsafe to automatically vectorize 383 /// floating-point operations because the semantics of vector and scalar 384 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math 385 /// does not support IEEE-754 denormal numbers, while depending on the 386 /// platform, scalar floating-point math does. 387 /// This applies to floating-point math operations and calls, not memory 388 /// operations, shuffles, or casts. 389 bool isFPVectorizationPotentiallyUnsafe() const; 390 391 /// \brief Determine if the target supports unaligned memory accesses. 392 bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0, 393 unsigned Alignment = 1, 394 bool *Fast = nullptr) const; 395 396 /// \brief Return hardware support for population count. 397 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; 398 399 /// \brief Return true if the hardware has a fast square-root instruction. 400 bool haveFastSqrt(Type *Ty) const; 401 402 /// \brief Return the expected cost of supporting the floating point operation 403 /// of the specified type. 404 int getFPOpCost(Type *Ty) const; 405 406 /// \brief Return the expected cost of materializing for the given integer 407 /// immediate of the specified type. 408 int getIntImmCost(const APInt &Imm, Type *Ty) const; 409 410 /// \brief Return the expected cost of materialization for the given integer 411 /// immediate of the specified type for a given instruction. The cost can be 412 /// zero if the immediate can be folded into the specified instruction. 413 int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, 414 Type *Ty) const; 415 int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 416 Type *Ty) const; 417 418 /// \brief Return the expected cost for the given integer when optimising 419 /// for size. This is different than the other integer immediate cost 420 /// functions in that it is subtarget agnostic. This is useful when you e.g. 421 /// target one ISA such as Aarch32 but smaller encodings could be possible 422 /// with another such as Thumb. This return value is used as a penalty when 423 /// the total costs for a constant is calculated (the bigger the cost, the 424 /// more beneficial constant hoisting is). 425 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, 426 Type *Ty) const; 427 /// @} 428 429 /// \name Vector Target Information 430 /// @{ 431 432 /// \brief The various kinds of shuffle patterns for vector queries. 433 enum ShuffleKind { 434 SK_Broadcast, ///< Broadcast element 0 to all other elements. 435 SK_Reverse, ///< Reverse the order of the vector. 436 SK_Alternate, ///< Choose alternate elements from vector. 437 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset. 438 SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset. 439 }; 440 441 /// \brief Additional information about an operand's possible values. 442 enum OperandValueKind { 443 OK_AnyValue, // Operand can have any value. 444 OK_UniformValue, // Operand is uniform (splat of a value). 445 OK_UniformConstantValue, // Operand is uniform constant. 446 OK_NonUniformConstantValue // Operand is a non uniform constant value. 447 }; 448 449 /// \brief Additional properties of an operand's values. 450 enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; 451 452 /// \return The number of scalar or vector registers that the target has. 453 /// If 'Vectors' is true, it returns the number of vector registers. If it is 454 /// set to false, it returns the number of scalar registers. 455 unsigned getNumberOfRegisters(bool Vector) const; 456 457 /// \return The width of the largest scalar or vector register type. 458 unsigned getRegisterBitWidth(bool Vector) const; 459 460 /// \return The bitwidth of the largest vector type that should be used to 461 /// load/store in the given address space. 462 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 463 464 /// \return The size of a cache line in bytes. 465 unsigned getCacheLineSize() const; 466 467 /// \return How much before a load we should place the prefetch instruction. 468 /// This is currently measured in number of instructions. 469 unsigned getPrefetchDistance() const; 470 471 /// \return Some HW prefetchers can handle accesses up to a certain constant 472 /// stride. This is the minimum stride in bytes where it makes sense to start 473 /// adding SW prefetches. The default is 1, i.e. prefetch with any stride. 474 unsigned getMinPrefetchStride() const; 475 476 /// \return The maximum number of iterations to prefetch ahead. If the 477 /// required number of iterations is more than this number, no prefetching is 478 /// performed. 479 unsigned getMaxPrefetchIterationsAhead() const; 480 481 /// \return The maximum interleave factor that any transform should try to 482 /// perform for this target. This number depends on the level of parallelism 483 /// and the number of execution units in the CPU. 484 unsigned getMaxInterleaveFactor(unsigned VF) const; 485 486 /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc. 487 int getArithmeticInstrCost( 488 unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, 489 OperandValueKind Opd2Info = OK_AnyValue, 490 OperandValueProperties Opd1PropInfo = OP_None, 491 OperandValueProperties Opd2PropInfo = OP_None) const; 492 493 /// \return The cost of a shuffle instruction of kind Kind and of type Tp. 494 /// The index and subtype parameters are used by the subvector insertion and 495 /// extraction shuffle kinds. 496 int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, 497 Type *SubTp = nullptr) const; 498 499 /// \return The expected cost of cast instructions, such as bitcast, trunc, 500 /// zext, etc. 501 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; 502 503 /// \return The expected cost of a sign- or zero-extended vector extract. Use 504 /// -1 to indicate that there is no information about the index value. 505 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, 506 unsigned Index = -1) const; 507 508 /// \return The expected cost of control-flow related instructions such as 509 /// Phi, Ret, Br. 510 int getCFInstrCost(unsigned Opcode) const; 511 512 /// \returns The expected cost of compare and select instructions. 513 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 514 Type *CondTy = nullptr) const; 515 516 /// \return The expected cost of vector Insert and Extract. 517 /// Use -1 to indicate that there is no information on the index value. 518 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const; 519 520 /// \return The cost of Load and Store instructions. 521 int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 522 unsigned AddressSpace) const; 523 524 /// \return The cost of masked Load and Store instructions. 525 int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 526 unsigned AddressSpace) const; 527 528 /// \return The cost of Gather or Scatter operation 529 /// \p Opcode - is a type of memory access Load or Store 530 /// \p DataTy - a vector type of the data to be loaded or stored 531 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory 532 /// \p VariableMask - true when the memory access is predicated with a mask 533 /// that is not a compile-time constant 534 /// \p Alignment - alignment of single element 535 int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, 536 bool VariableMask, unsigned Alignment) const; 537 538 /// \return The cost of the interleaved memory operation. 539 /// \p Opcode is the memory operation code 540 /// \p VecTy is the vector type of the interleaved access. 541 /// \p Factor is the interleave factor 542 /// \p Indices is the indices for interleaved load members (as interleaved 543 /// load allows gaps) 544 /// \p Alignment is the alignment of the memory operation 545 /// \p AddressSpace is address space of the pointer. 546 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, 547 ArrayRef<unsigned> Indices, unsigned Alignment, 548 unsigned AddressSpace) const; 549 550 /// \brief Calculate the cost of performing a vector reduction. 551 /// 552 /// This is the cost of reducing the vector value of type \p Ty to a scalar 553 /// value using the operation denoted by \p Opcode. The form of the reduction 554 /// can either be a pairwise reduction or a reduction that splits the vector 555 /// at every reduction level. 556 /// 557 /// Pairwise: 558 /// (v0, v1, v2, v3) 559 /// ((v0+v1), (v2, v3), undef, undef) 560 /// Split: 561 /// (v0, v1, v2, v3) 562 /// ((v0+v2), (v1+v3), undef, undef) 563 int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const; 564 565 /// \returns The cost of Intrinsic instructions. Types analysis only. 566 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 567 ArrayRef<Type *> Tys, FastMathFlags FMF) const; 568 569 /// \returns The cost of Intrinsic instructions. Analyses the real arguments. 570 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 571 ArrayRef<Value *> Args, FastMathFlags FMF) const; 572 573 /// \returns The cost of Call instructions. 574 int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const; 575 576 /// \returns The number of pieces into which the provided type must be 577 /// split during legalization. Zero is returned when the answer is unknown. 578 unsigned getNumberOfParts(Type *Tp) const; 579 580 /// \returns The cost of the address computation. For most targets this can be 581 /// merged into the instruction indexing mode. Some targets might want to 582 /// distinguish between address computation for memory operations on vector 583 /// types and scalar types. Such targets should override this function. 584 /// The 'IsComplex' parameter is a hint that the address computation is likely 585 /// to involve multiple instructions and as such unlikely to be merged into 586 /// the address indexing mode. 587 int getAddressComputationCost(Type *Ty, bool IsComplex = false) const; 588 589 /// \returns The cost, if any, of keeping values of the given types alive 590 /// over a callsite. 591 /// 592 /// Some types may require the use of register classes that do not have 593 /// any callee-saved registers, so would require a spill and fill. 594 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const; 595 596 /// \returns True if the intrinsic is a supported memory intrinsic. Info 597 /// will contain additional information - whether the intrinsic may write 598 /// or read to memory, volatility and the pointer. Info is undefined 599 /// if false is returned. 600 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; 601 602 /// \returns A value which is the result of the given memory intrinsic. New 603 /// instructions may be created to extract the result from the given intrinsic 604 /// memory operation. Returns nullptr if the target cannot create a result 605 /// from the given intrinsic. 606 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 607 Type *ExpectedType) const; 608 609 /// \returns True if the two functions have compatible attributes for inlining 610 /// purposes. 611 bool areInlineCompatible(const Function *Caller, 612 const Function *Callee) const; 613 614 /// @} 615 616 private: 617 /// \brief The abstract base class used to type erase specific TTI 618 /// implementations. 619 class Concept; 620 621 /// \brief The template model for the base class which wraps a concrete 622 /// implementation in a type erased interface. 623 template <typename T> class Model; 624 625 std::unique_ptr<Concept> TTIImpl; 626 }; 627 628 class TargetTransformInfo::Concept { 629 public: 630 virtual ~Concept() = 0; 631 virtual const DataLayout &getDataLayout() const = 0; 632 virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0; 633 virtual int getGEPCost(Type *PointeeType, const Value *Ptr, 634 ArrayRef<const Value *> Operands) = 0; 635 virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0; 636 virtual int getCallCost(const Function *F, int NumArgs) = 0; 637 virtual int getCallCost(const Function *F, 638 ArrayRef<const Value *> Arguments) = 0; 639 virtual unsigned getInliningThresholdMultiplier() = 0; 640 virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 641 ArrayRef<Type *> ParamTys) = 0; 642 virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 643 ArrayRef<const Value *> Arguments) = 0; 644 virtual int getUserCost(const User *U) = 0; 645 virtual bool hasBranchDivergence() = 0; 646 virtual bool isSourceOfDivergence(const Value *V) = 0; 647 virtual bool isLoweredToCall(const Function *F) = 0; 648 virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0; 649 virtual bool isLegalAddImmediate(int64_t Imm) = 0; 650 virtual bool isLegalICmpImmediate(int64_t Imm) = 0; 651 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 652 int64_t BaseOffset, bool HasBaseReg, 653 int64_t Scale, 654 unsigned AddrSpace) = 0; 655 virtual bool isLegalMaskedStore(Type *DataType) = 0; 656 virtual bool isLegalMaskedLoad(Type *DataType) = 0; 657 virtual bool isLegalMaskedScatter(Type *DataType) = 0; 658 virtual bool isLegalMaskedGather(Type *DataType) = 0; 659 virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 660 int64_t BaseOffset, bool HasBaseReg, 661 int64_t Scale, unsigned AddrSpace) = 0; 662 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; 663 virtual bool isProfitableToHoist(Instruction *I) = 0; 664 virtual bool isTypeLegal(Type *Ty) = 0; 665 virtual unsigned getJumpBufAlignment() = 0; 666 virtual unsigned getJumpBufSize() = 0; 667 virtual bool shouldBuildLookupTables() = 0; 668 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; 669 virtual bool enableInterleavedAccessVectorization() = 0; 670 virtual bool isFPVectorizationPotentiallyUnsafe() = 0; 671 virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth, 672 unsigned AddressSpace, 673 unsigned Alignment, 674 bool *Fast) = 0; 675 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; 676 virtual bool haveFastSqrt(Type *Ty) = 0; 677 virtual int getFPOpCost(Type *Ty) = 0; 678 virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, 679 Type *Ty) = 0; 680 virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0; 681 virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, 682 Type *Ty) = 0; 683 virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 684 Type *Ty) = 0; 685 virtual unsigned getNumberOfRegisters(bool Vector) = 0; 686 virtual unsigned getRegisterBitWidth(bool Vector) = 0; 687 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) = 0; 688 virtual unsigned getCacheLineSize() = 0; 689 virtual unsigned getPrefetchDistance() = 0; 690 virtual unsigned getMinPrefetchStride() = 0; 691 virtual unsigned getMaxPrefetchIterationsAhead() = 0; 692 virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; 693 virtual unsigned 694 getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, 695 OperandValueKind Opd2Info, 696 OperandValueProperties Opd1PropInfo, 697 OperandValueProperties Opd2PropInfo) = 0; 698 virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 699 Type *SubTp) = 0; 700 virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0; 701 virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, 702 VectorType *VecTy, unsigned Index) = 0; 703 virtual int getCFInstrCost(unsigned Opcode) = 0; 704 virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 705 Type *CondTy) = 0; 706 virtual int getVectorInstrCost(unsigned Opcode, Type *Val, 707 unsigned Index) = 0; 708 virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 709 unsigned AddressSpace) = 0; 710 virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 711 unsigned Alignment, 712 unsigned AddressSpace) = 0; 713 virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 714 Value *Ptr, bool VariableMask, 715 unsigned Alignment) = 0; 716 virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, 717 unsigned Factor, 718 ArrayRef<unsigned> Indices, 719 unsigned Alignment, 720 unsigned AddressSpace) = 0; 721 virtual int getReductionCost(unsigned Opcode, Type *Ty, 722 bool IsPairwiseForm) = 0; 723 virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 724 ArrayRef<Type *> Tys, 725 FastMathFlags FMF) = 0; 726 virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 727 ArrayRef<Value *> Args, 728 FastMathFlags FMF) = 0; 729 virtual int getCallInstrCost(Function *F, Type *RetTy, 730 ArrayRef<Type *> Tys) = 0; 731 virtual unsigned getNumberOfParts(Type *Tp) = 0; 732 virtual int getAddressComputationCost(Type *Ty, bool IsComplex) = 0; 733 virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0; 734 virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, 735 MemIntrinsicInfo &Info) = 0; 736 virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 737 Type *ExpectedType) = 0; 738 virtual bool areInlineCompatible(const Function *Caller, 739 const Function *Callee) const = 0; 740 }; 741 742 template <typename T> 743 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { 744 T Impl; 745 746 public: 747 Model(T Impl) : Impl(std::move(Impl)) {} 748 ~Model() override {} 749 750 const DataLayout &getDataLayout() const override { 751 return Impl.getDataLayout(); 752 } 753 754 int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override { 755 return Impl.getOperationCost(Opcode, Ty, OpTy); 756 } 757 int getGEPCost(Type *PointeeType, const Value *Ptr, 758 ArrayRef<const Value *> Operands) override { 759 return Impl.getGEPCost(PointeeType, Ptr, Operands); 760 } 761 int getCallCost(FunctionType *FTy, int NumArgs) override { 762 return Impl.getCallCost(FTy, NumArgs); 763 } 764 int getCallCost(const Function *F, int NumArgs) override { 765 return Impl.getCallCost(F, NumArgs); 766 } 767 int getCallCost(const Function *F, 768 ArrayRef<const Value *> Arguments) override { 769 return Impl.getCallCost(F, Arguments); 770 } 771 unsigned getInliningThresholdMultiplier() override { 772 return Impl.getInliningThresholdMultiplier(); 773 } 774 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 775 ArrayRef<Type *> ParamTys) override { 776 return Impl.getIntrinsicCost(IID, RetTy, ParamTys); 777 } 778 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 779 ArrayRef<const Value *> Arguments) override { 780 return Impl.getIntrinsicCost(IID, RetTy, Arguments); 781 } 782 int getUserCost(const User *U) override { return Impl.getUserCost(U); } 783 bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } 784 bool isSourceOfDivergence(const Value *V) override { 785 return Impl.isSourceOfDivergence(V); 786 } 787 bool isLoweredToCall(const Function *F) override { 788 return Impl.isLoweredToCall(F); 789 } 790 void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override { 791 return Impl.getUnrollingPreferences(L, UP); 792 } 793 bool isLegalAddImmediate(int64_t Imm) override { 794 return Impl.isLegalAddImmediate(Imm); 795 } 796 bool isLegalICmpImmediate(int64_t Imm) override { 797 return Impl.isLegalICmpImmediate(Imm); 798 } 799 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 800 bool HasBaseReg, int64_t Scale, 801 unsigned AddrSpace) override { 802 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, 803 Scale, AddrSpace); 804 } 805 bool isLegalMaskedStore(Type *DataType) override { 806 return Impl.isLegalMaskedStore(DataType); 807 } 808 bool isLegalMaskedLoad(Type *DataType) override { 809 return Impl.isLegalMaskedLoad(DataType); 810 } 811 bool isLegalMaskedScatter(Type *DataType) override { 812 return Impl.isLegalMaskedScatter(DataType); 813 } 814 bool isLegalMaskedGather(Type *DataType) override { 815 return Impl.isLegalMaskedGather(DataType); 816 } 817 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 818 bool HasBaseReg, int64_t Scale, 819 unsigned AddrSpace) override { 820 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, 821 Scale, AddrSpace); 822 } 823 bool isTruncateFree(Type *Ty1, Type *Ty2) override { 824 return Impl.isTruncateFree(Ty1, Ty2); 825 } 826 bool isProfitableToHoist(Instruction *I) override { 827 return Impl.isProfitableToHoist(I); 828 } 829 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } 830 unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); } 831 unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); } 832 bool shouldBuildLookupTables() override { 833 return Impl.shouldBuildLookupTables(); 834 } 835 bool enableAggressiveInterleaving(bool LoopHasReductions) override { 836 return Impl.enableAggressiveInterleaving(LoopHasReductions); 837 } 838 bool enableInterleavedAccessVectorization() override { 839 return Impl.enableInterleavedAccessVectorization(); 840 } 841 bool isFPVectorizationPotentiallyUnsafe() override { 842 return Impl.isFPVectorizationPotentiallyUnsafe(); 843 } 844 bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace, 845 unsigned Alignment, bool *Fast) override { 846 return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace, 847 Alignment, Fast); 848 } 849 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override { 850 return Impl.getPopcntSupport(IntTyWidthInBit); 851 } 852 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); } 853 854 int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); } 855 856 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, 857 Type *Ty) override { 858 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); 859 } 860 int getIntImmCost(const APInt &Imm, Type *Ty) override { 861 return Impl.getIntImmCost(Imm, Ty); 862 } 863 int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, 864 Type *Ty) override { 865 return Impl.getIntImmCost(Opc, Idx, Imm, Ty); 866 } 867 int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 868 Type *Ty) override { 869 return Impl.getIntImmCost(IID, Idx, Imm, Ty); 870 } 871 unsigned getNumberOfRegisters(bool Vector) override { 872 return Impl.getNumberOfRegisters(Vector); 873 } 874 unsigned getRegisterBitWidth(bool Vector) override { 875 return Impl.getRegisterBitWidth(Vector); 876 } 877 878 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) override { 879 return Impl.getLoadStoreVecRegBitWidth(AddrSpace); 880 } 881 882 unsigned getCacheLineSize() override { 883 return Impl.getCacheLineSize(); 884 } 885 unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); } 886 unsigned getMinPrefetchStride() override { 887 return Impl.getMinPrefetchStride(); 888 } 889 unsigned getMaxPrefetchIterationsAhead() override { 890 return Impl.getMaxPrefetchIterationsAhead(); 891 } 892 unsigned getMaxInterleaveFactor(unsigned VF) override { 893 return Impl.getMaxInterleaveFactor(VF); 894 } 895 unsigned 896 getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, 897 OperandValueKind Opd2Info, 898 OperandValueProperties Opd1PropInfo, 899 OperandValueProperties Opd2PropInfo) override { 900 return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, 901 Opd1PropInfo, Opd2PropInfo); 902 } 903 int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 904 Type *SubTp) override { 905 return Impl.getShuffleCost(Kind, Tp, Index, SubTp); 906 } 907 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override { 908 return Impl.getCastInstrCost(Opcode, Dst, Src); 909 } 910 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, 911 unsigned Index) override { 912 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); 913 } 914 int getCFInstrCost(unsigned Opcode) override { 915 return Impl.getCFInstrCost(Opcode); 916 } 917 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override { 918 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy); 919 } 920 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { 921 return Impl.getVectorInstrCost(Opcode, Val, Index); 922 } 923 int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 924 unsigned AddressSpace) override { 925 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); 926 } 927 int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 928 unsigned AddressSpace) override { 929 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); 930 } 931 int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 932 Value *Ptr, bool VariableMask, 933 unsigned Alignment) override { 934 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, 935 Alignment); 936 } 937 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, 938 ArrayRef<unsigned> Indices, unsigned Alignment, 939 unsigned AddressSpace) override { 940 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, 941 Alignment, AddressSpace); 942 } 943 int getReductionCost(unsigned Opcode, Type *Ty, 944 bool IsPairwiseForm) override { 945 return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm); 946 } 947 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, 948 FastMathFlags FMF) override { 949 return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF); 950 } 951 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 952 ArrayRef<Value *> Args, 953 FastMathFlags FMF) override { 954 return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF); 955 } 956 int getCallInstrCost(Function *F, Type *RetTy, 957 ArrayRef<Type *> Tys) override { 958 return Impl.getCallInstrCost(F, RetTy, Tys); 959 } 960 unsigned getNumberOfParts(Type *Tp) override { 961 return Impl.getNumberOfParts(Tp); 962 } 963 int getAddressComputationCost(Type *Ty, bool IsComplex) override { 964 return Impl.getAddressComputationCost(Ty, IsComplex); 965 } 966 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override { 967 return Impl.getCostOfKeepingLiveOverCall(Tys); 968 } 969 bool getTgtMemIntrinsic(IntrinsicInst *Inst, 970 MemIntrinsicInfo &Info) override { 971 return Impl.getTgtMemIntrinsic(Inst, Info); 972 } 973 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 974 Type *ExpectedType) override { 975 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); 976 } 977 bool areInlineCompatible(const Function *Caller, 978 const Function *Callee) const override { 979 return Impl.areInlineCompatible(Caller, Callee); 980 } 981 }; 982 983 template <typename T> 984 TargetTransformInfo::TargetTransformInfo(T Impl) 985 : TTIImpl(new Model<T>(Impl)) {} 986 987 /// \brief Analysis pass providing the \c TargetTransformInfo. 988 /// 989 /// The core idea of the TargetIRAnalysis is to expose an interface through 990 /// which LLVM targets can analyze and provide information about the middle 991 /// end's target-independent IR. This supports use cases such as target-aware 992 /// cost modeling of IR constructs. 993 /// 994 /// This is a function analysis because much of the cost modeling for targets 995 /// is done in a subtarget specific way and LLVM supports compiling different 996 /// functions targeting different subtargets in order to support runtime 997 /// dispatch according to the observed subtarget. 998 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> { 999 public: 1000 typedef TargetTransformInfo Result; 1001 1002 /// \brief Default construct a target IR analysis. 1003 /// 1004 /// This will use the module's datalayout to construct a baseline 1005 /// conservative TTI result. 1006 TargetIRAnalysis(); 1007 1008 /// \brief Construct an IR analysis pass around a target-provide callback. 1009 /// 1010 /// The callback will be called with a particular function for which the TTI 1011 /// is needed and must return a TTI object for that function. 1012 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback); 1013 1014 // Value semantics. We spell out the constructors for MSVC. 1015 TargetIRAnalysis(const TargetIRAnalysis &Arg) 1016 : TTICallback(Arg.TTICallback) {} 1017 TargetIRAnalysis(TargetIRAnalysis &&Arg) 1018 : TTICallback(std::move(Arg.TTICallback)) {} 1019 TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) { 1020 TTICallback = RHS.TTICallback; 1021 return *this; 1022 } 1023 TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) { 1024 TTICallback = std::move(RHS.TTICallback); 1025 return *this; 1026 } 1027 1028 Result run(const Function &F, AnalysisManager<Function> &); 1029 1030 private: 1031 friend AnalysisInfoMixin<TargetIRAnalysis>; 1032 static char PassID; 1033 1034 /// \brief The callback used to produce a result. 1035 /// 1036 /// We use a completely opaque callback so that targets can provide whatever 1037 /// mechanism they desire for constructing the TTI for a given function. 1038 /// 1039 /// FIXME: Should we really use std::function? It's relatively inefficient. 1040 /// It might be possible to arrange for even stateful callbacks to outlive 1041 /// the analysis and thus use a function_ref which would be lighter weight. 1042 /// This may also be less error prone as the callback is likely to reference 1043 /// the external TargetMachine, and that reference needs to never dangle. 1044 std::function<Result(const Function &)> TTICallback; 1045 1046 /// \brief Helper function used as the callback in the default constructor. 1047 static Result getDefaultTTI(const Function &F); 1048 }; 1049 1050 /// \brief Wrapper pass for TargetTransformInfo. 1051 /// 1052 /// This pass can be constructed from a TTI object which it stores internally 1053 /// and is queried by passes. 1054 class TargetTransformInfoWrapperPass : public ImmutablePass { 1055 TargetIRAnalysis TIRA; 1056 Optional<TargetTransformInfo> TTI; 1057 1058 virtual void anchor(); 1059 1060 public: 1061 static char ID; 1062 1063 /// \brief We must provide a default constructor for the pass but it should 1064 /// never be used. 1065 /// 1066 /// Use the constructor below or call one of the creation routines. 1067 TargetTransformInfoWrapperPass(); 1068 1069 explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); 1070 1071 TargetTransformInfo &getTTI(const Function &F); 1072 }; 1073 1074 /// \brief Create an analysis pass wrapper around a TTI object. 1075 /// 1076 /// This analysis pass just holds the TTI instance and makes it available to 1077 /// clients. 1078 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); 1079 1080 } // End llvm namespace 1081 1082 #endif 1083