1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file provides helpers for the implementation of 11 /// a TargetTransformInfo-conforming class. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 17 18 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 19 #include "llvm/Analysis/TargetTransformInfo.h" 20 #include "llvm/Analysis/VectorUtils.h" 21 #include "llvm/IR/CallSite.h" 22 #include "llvm/IR/DataLayout.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/GetElementPtrTypeIterator.h" 25 #include "llvm/IR/Operator.h" 26 #include "llvm/IR/Type.h" 27 28 namespace llvm { 29 30 /// Base class for use as a mix-in that aids implementing 31 /// a TargetTransformInfo-compatible class. 32 class TargetTransformInfoImplBase { 33 protected: 34 typedef TargetTransformInfo TTI; 35 36 const DataLayout &DL; 37 38 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 39 40 public: 41 // Provide value semantics. MSVC requires that we spell all of these out. 42 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) 43 : DL(Arg.DL) {} 44 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 45 46 const DataLayout &getDataLayout() const { return DL; } 47 48 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { 49 switch (Opcode) { 50 default: 51 // By default, just classify everything as 'basic'. 52 return TTI::TCC_Basic; 53 54 case Instruction::GetElementPtr: 55 llvm_unreachable("Use getGEPCost for GEP operations!"); 56 57 case Instruction::BitCast: 58 assert(OpTy && "Cast instructions must provide the operand type"); 59 if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) 60 // Identity and pointer-to-pointer casts are free. 61 return TTI::TCC_Free; 62 63 // Otherwise, the default basic cost is used. 64 return TTI::TCC_Basic; 65 66 case Instruction::FDiv: 67 case Instruction::FRem: 68 case Instruction::SDiv: 69 case Instruction::SRem: 70 case Instruction::UDiv: 71 case Instruction::URem: 72 return TTI::TCC_Expensive; 73 74 case Instruction::IntToPtr: { 75 // An inttoptr cast is free so long as the input is a legal integer type 76 // which doesn't contain values outside the range of a pointer. 77 unsigned OpSize = OpTy->getScalarSizeInBits(); 78 if (DL.isLegalInteger(OpSize) && 79 OpSize <= DL.getPointerTypeSizeInBits(Ty)) 80 return TTI::TCC_Free; 81 82 // Otherwise it's not a no-op. 83 return TTI::TCC_Basic; 84 } 85 case Instruction::PtrToInt: { 86 // A ptrtoint cast is free so long as the result is large enough to store 87 // the pointer, and a legal integer type. 88 unsigned DestSize = Ty->getScalarSizeInBits(); 89 if (DL.isLegalInteger(DestSize) && 90 DestSize >= DL.getPointerTypeSizeInBits(OpTy)) 91 return TTI::TCC_Free; 92 93 // Otherwise it's not a no-op. 94 return TTI::TCC_Basic; 95 } 96 case Instruction::Trunc: 97 // trunc to a native type is free (assuming the target has compare and 98 // shift-right of the same width). 99 if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty))) 100 return TTI::TCC_Free; 101 102 return TTI::TCC_Basic; 103 } 104 } 105 106 int getGEPCost(Type *PointeeType, const Value *Ptr, 107 ArrayRef<const Value *> Operands) { 108 // In the basic model, we just assume that all-constant GEPs will be folded 109 // into their uses via addressing modes. 110 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) 111 if (!isa<Constant>(Operands[Idx])) 112 return TTI::TCC_Basic; 113 114 return TTI::TCC_Free; 115 } 116 117 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 118 unsigned &JTSize) { 119 JTSize = 0; 120 return SI.getNumCases(); 121 } 122 123 int getExtCost(const Instruction *I, const Value *Src) { 124 return TTI::TCC_Basic; 125 } 126 127 unsigned getCallCost(FunctionType *FTy, int NumArgs) { 128 assert(FTy && "FunctionType must be provided to this routine."); 129 130 // The target-independent implementation just measures the size of the 131 // function by approximating that each argument will take on average one 132 // instruction to prepare. 133 134 if (NumArgs < 0) 135 // Set the argument number to the number of explicit arguments in the 136 // function. 137 NumArgs = FTy->getNumParams(); 138 139 return TTI::TCC_Basic * (NumArgs + 1); 140 } 141 142 unsigned getInliningThresholdMultiplier() { return 1; } 143 144 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 145 ArrayRef<Type *> ParamTys) { 146 switch (IID) { 147 default: 148 // Intrinsics rarely (if ever) have normal argument setup constraints. 149 // Model them as having a basic instruction cost. 150 // FIXME: This is wrong for libc intrinsics. 151 return TTI::TCC_Basic; 152 153 case Intrinsic::annotation: 154 case Intrinsic::assume: 155 case Intrinsic::sideeffect: 156 case Intrinsic::dbg_declare: 157 case Intrinsic::dbg_value: 158 case Intrinsic::dbg_label: 159 case Intrinsic::invariant_start: 160 case Intrinsic::invariant_end: 161 case Intrinsic::lifetime_start: 162 case Intrinsic::lifetime_end: 163 case Intrinsic::objectsize: 164 case Intrinsic::ptr_annotation: 165 case Intrinsic::var_annotation: 166 case Intrinsic::experimental_gc_result: 167 case Intrinsic::experimental_gc_relocate: 168 case Intrinsic::coro_alloc: 169 case Intrinsic::coro_begin: 170 case Intrinsic::coro_free: 171 case Intrinsic::coro_end: 172 case Intrinsic::coro_frame: 173 case Intrinsic::coro_size: 174 case Intrinsic::coro_suspend: 175 case Intrinsic::coro_param: 176 case Intrinsic::coro_subfn_addr: 177 // These intrinsics don't actually represent code after lowering. 178 return TTI::TCC_Free; 179 } 180 } 181 182 bool hasBranchDivergence() { return false; } 183 184 bool isSourceOfDivergence(const Value *V) { return false; } 185 186 bool isAlwaysUniform(const Value *V) { return false; } 187 188 unsigned getFlatAddressSpace () { 189 return -1; 190 } 191 192 bool isLoweredToCall(const Function *F) { 193 assert(F && "A concrete function must be provided to this routine."); 194 195 // FIXME: These should almost certainly not be handled here, and instead 196 // handled with the help of TLI or the target itself. This was largely 197 // ported from existing analysis heuristics here so that such refactorings 198 // can take place in the future. 199 200 if (F->isIntrinsic()) 201 return false; 202 203 if (F->hasLocalLinkage() || !F->hasName()) 204 return true; 205 206 StringRef Name = F->getName(); 207 208 // These will all likely lower to a single selection DAG node. 209 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 210 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || 211 Name == "fmin" || Name == "fminf" || Name == "fminl" || 212 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 213 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || 214 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") 215 return false; 216 217 // These are all likely to be optimized into something smaller. 218 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 219 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 220 Name == "floorf" || Name == "ceil" || Name == "round" || 221 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 222 Name == "llabs") 223 return false; 224 225 return true; 226 } 227 228 void getUnrollingPreferences(Loop *, ScalarEvolution &, 229 TTI::UnrollingPreferences &) {} 230 231 bool isLegalAddImmediate(int64_t Imm) { return false; } 232 233 bool isLegalICmpImmediate(int64_t Imm) { return false; } 234 235 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 236 bool HasBaseReg, int64_t Scale, 237 unsigned AddrSpace, Instruction *I = nullptr) { 238 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 239 // taken from the implementation of LSR. 240 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 241 } 242 243 bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { 244 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 245 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 246 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 247 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 248 } 249 250 bool canMacroFuseCmp() { return false; } 251 252 bool shouldFavorPostInc() const { return false; } 253 254 bool isLegalMaskedStore(Type *DataType) { return false; } 255 256 bool isLegalMaskedLoad(Type *DataType) { return false; } 257 258 bool isLegalMaskedScatter(Type *DataType) { return false; } 259 260 bool isLegalMaskedGather(Type *DataType) { return false; } 261 262 bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } 263 264 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } 265 266 bool prefersVectorizedAddressing() { return true; } 267 268 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 269 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { 270 // Guess that all legal addressing mode are free. 271 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, 272 Scale, AddrSpace)) 273 return 0; 274 return -1; 275 } 276 277 bool LSRWithInstrQueries() { return false; } 278 279 bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } 280 281 bool isProfitableToHoist(Instruction *I) { return true; } 282 283 bool useAA() { return false; } 284 285 bool isTypeLegal(Type *Ty) { return false; } 286 287 unsigned getJumpBufAlignment() { return 0; } 288 289 unsigned getJumpBufSize() { return 0; } 290 291 bool shouldBuildLookupTables() { return true; } 292 bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } 293 294 bool useColdCCForColdCall(Function &F) { return false; } 295 296 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { 297 return 0; 298 } 299 300 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 301 unsigned VF) { return 0; } 302 303 bool supportsEfficientVectorElementLoadStore() { return false; } 304 305 bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } 306 307 const TTI::MemCmpExpansionOptions *enableMemCmpExpansion( 308 bool IsZeroCmp) const { 309 return nullptr; 310 } 311 312 bool enableInterleavedAccessVectorization() { return false; } 313 314 bool isFPVectorizationPotentiallyUnsafe() { return false; } 315 316 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, 317 unsigned BitWidth, 318 unsigned AddressSpace, 319 unsigned Alignment, 320 bool *Fast) { return false; } 321 322 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { 323 return TTI::PSK_Software; 324 } 325 326 bool haveFastSqrt(Type *Ty) { return false; } 327 328 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; } 329 330 unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } 331 332 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 333 Type *Ty) { 334 return 0; 335 } 336 337 unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } 338 339 unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 340 Type *Ty) { 341 return TTI::TCC_Free; 342 } 343 344 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 345 Type *Ty) { 346 return TTI::TCC_Free; 347 } 348 349 unsigned getNumberOfRegisters(bool Vector) { return 8; } 350 351 unsigned getRegisterBitWidth(bool Vector) const { return 32; } 352 353 unsigned getMinVectorRegisterBitWidth() { return 128; } 354 355 bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } 356 357 unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } 358 359 bool 360 shouldConsiderAddressTypePromotion(const Instruction &I, 361 bool &AllowPromotionWithoutCommonHeader) { 362 AllowPromotionWithoutCommonHeader = false; 363 return false; 364 } 365 366 unsigned getCacheLineSize() { return 0; } 367 368 llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) { 369 switch (Level) { 370 case TargetTransformInfo::CacheLevel::L1D: 371 LLVM_FALLTHROUGH; 372 case TargetTransformInfo::CacheLevel::L2D: 373 return llvm::Optional<unsigned>(); 374 } 375 376 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 377 } 378 379 llvm::Optional<unsigned> getCacheAssociativity( 380 TargetTransformInfo::CacheLevel Level) { 381 switch (Level) { 382 case TargetTransformInfo::CacheLevel::L1D: 383 LLVM_FALLTHROUGH; 384 case TargetTransformInfo::CacheLevel::L2D: 385 return llvm::Optional<unsigned>(); 386 } 387 388 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 389 } 390 391 unsigned getPrefetchDistance() { return 0; } 392 393 unsigned getMinPrefetchStride() { return 1; } 394 395 unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; } 396 397 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } 398 399 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 400 TTI::OperandValueKind Opd1Info, 401 TTI::OperandValueKind Opd2Info, 402 TTI::OperandValueProperties Opd1PropInfo, 403 TTI::OperandValueProperties Opd2PropInfo, 404 ArrayRef<const Value *> Args) { 405 return 1; 406 } 407 408 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index, 409 Type *SubTp) { 410 return 1; 411 } 412 413 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 414 const Instruction *I) { return 1; } 415 416 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, 417 VectorType *VecTy, unsigned Index) { 418 return 1; 419 } 420 421 unsigned getCFInstrCost(unsigned Opcode) { return 1; } 422 423 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 424 const Instruction *I) { 425 return 1; 426 } 427 428 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { 429 return 1; 430 } 431 432 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 433 unsigned AddressSpace, const Instruction *I) { 434 return 1; 435 } 436 437 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 438 unsigned AddressSpace) { 439 return 1; 440 } 441 442 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, 443 bool VariableMask, 444 unsigned Alignment) { 445 return 1; 446 } 447 448 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, 449 unsigned Factor, 450 ArrayRef<unsigned> Indices, 451 unsigned Alignment, 452 unsigned AddressSpace) { 453 return 1; 454 } 455 456 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 457 ArrayRef<Type *> Tys, FastMathFlags FMF, 458 unsigned ScalarizationCostPassed) { 459 return 1; 460 } 461 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 462 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) { 463 return 1; 464 } 465 466 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { 467 return 1; 468 } 469 470 unsigned getNumberOfParts(Type *Tp) { return 0; } 471 472 unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, 473 const SCEV *) { 474 return 0; 475 } 476 477 unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; } 478 479 unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; } 480 481 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; } 482 483 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) { 484 return false; 485 } 486 487 unsigned getAtomicMemIntrinsicMaxElementSize() const { 488 // Note for overrides: You must ensure for all element unordered-atomic 489 // memory intrinsics that all power-of-2 element sizes up to, and 490 // including, the return value of this method have a corresponding 491 // runtime lib call. These runtime lib call definitions can be found 492 // in RuntimeLibcalls.h 493 return 0; 494 } 495 496 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 497 Type *ExpectedType) { 498 return nullptr; 499 } 500 501 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 502 unsigned SrcAlign, unsigned DestAlign) const { 503 return Type::getInt8Ty(Context); 504 } 505 506 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, 507 LLVMContext &Context, 508 unsigned RemainingBytes, 509 unsigned SrcAlign, 510 unsigned DestAlign) const { 511 for (unsigned i = 0; i != RemainingBytes; ++i) 512 OpsOut.push_back(Type::getInt8Ty(Context)); 513 } 514 515 bool areInlineCompatible(const Function *Caller, 516 const Function *Callee) const { 517 return (Caller->getFnAttribute("target-cpu") == 518 Callee->getFnAttribute("target-cpu")) && 519 (Caller->getFnAttribute("target-features") == 520 Callee->getFnAttribute("target-features")); 521 } 522 523 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, 524 const DataLayout &DL) const { 525 return false; 526 } 527 528 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, 529 const DataLayout &DL) const { 530 return false; 531 } 532 533 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 534 535 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 536 537 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 538 539 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 540 unsigned Alignment, 541 unsigned AddrSpace) const { 542 return true; 543 } 544 545 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 546 unsigned Alignment, 547 unsigned AddrSpace) const { 548 return true; 549 } 550 551 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 552 unsigned ChainSizeInBytes, 553 VectorType *VecTy) const { 554 return VF; 555 } 556 557 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 558 unsigned ChainSizeInBytes, 559 VectorType *VecTy) const { 560 return VF; 561 } 562 563 bool useReductionIntrinsic(unsigned Opcode, Type *Ty, 564 TTI::ReductionFlags Flags) const { 565 return false; 566 } 567 568 bool shouldExpandReduction(const IntrinsicInst *II) const { 569 return true; 570 } 571 572 protected: 573 // Obtain the minimum required size to hold the value (without the sign) 574 // In case of a vector it returns the min required size for one element. 575 unsigned minRequiredElementSize(const Value* Val, bool &isSigned) { 576 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 577 const auto* VectorValue = cast<Constant>(Val); 578 579 // In case of a vector need to pick the max between the min 580 // required size for each element 581 auto *VT = cast<VectorType>(Val->getType()); 582 583 // Assume unsigned elements 584 isSigned = false; 585 586 // The max required size is the total vector width divided by num 587 // of elements in the vector 588 unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements(); 589 590 unsigned MinRequiredSize = 0; 591 for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 592 if (auto* IntElement = 593 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 594 bool signedElement = IntElement->getValue().isNegative(); 595 // Get the element min required size. 596 unsigned ElementMinRequiredSize = 597 IntElement->getValue().getMinSignedBits() - 1; 598 // In case one element is signed then all the vector is signed. 599 isSigned |= signedElement; 600 // Save the max required bit size between all the elements. 601 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 602 } 603 else { 604 // not an int constant element 605 return MaxRequiredSize; 606 } 607 } 608 return MinRequiredSize; 609 } 610 611 if (const auto* CI = dyn_cast<ConstantInt>(Val)) { 612 isSigned = CI->getValue().isNegative(); 613 return CI->getValue().getMinSignedBits() - 1; 614 } 615 616 if (const auto* Cast = dyn_cast<SExtInst>(Val)) { 617 isSigned = true; 618 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 619 } 620 621 if (const auto* Cast = dyn_cast<ZExtInst>(Val)) { 622 isSigned = false; 623 return Cast->getSrcTy()->getScalarSizeInBits(); 624 } 625 626 isSigned = false; 627 return Val->getType()->getScalarSizeInBits(); 628 } 629 630 bool isStridedAccess(const SCEV *Ptr) { 631 return Ptr && isa<SCEVAddRecExpr>(Ptr); 632 } 633 634 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 635 const SCEV *Ptr) { 636 if (!isStridedAccess(Ptr)) 637 return nullptr; 638 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 639 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 640 } 641 642 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 643 int64_t MergeDistance) { 644 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 645 if (!Step) 646 return false; 647 APInt StrideVal = Step->getAPInt(); 648 if (StrideVal.getBitWidth() > 64) 649 return false; 650 // FIXME: Need to take absolute value for negative stride case. 651 return StrideVal.getSExtValue() < MergeDistance; 652 } 653 }; 654 655 /// CRTP base class for use as a mix-in that aids implementing 656 /// a TargetTransformInfo-compatible class. 657 template <typename T> 658 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 659 private: 660 typedef TargetTransformInfoImplBase BaseT; 661 662 protected: 663 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 664 665 public: 666 using BaseT::getCallCost; 667 668 unsigned getCallCost(const Function *F, int NumArgs) { 669 assert(F && "A concrete function must be provided to this routine."); 670 671 if (NumArgs < 0) 672 // Set the argument number to the number of explicit arguments in the 673 // function. 674 NumArgs = F->arg_size(); 675 676 if (Intrinsic::ID IID = F->getIntrinsicID()) { 677 FunctionType *FTy = F->getFunctionType(); 678 SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); 679 return static_cast<T *>(this) 680 ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); 681 } 682 683 if (!static_cast<T *>(this)->isLoweredToCall(F)) 684 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 685 // directly. 686 687 return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs); 688 } 689 690 unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments) { 691 // Simply delegate to generic handling of the call. 692 // FIXME: We should use instsimplify or something else to catch calls which 693 // will constant fold with these arguments. 694 return static_cast<T *>(this)->getCallCost(F, Arguments.size()); 695 } 696 697 using BaseT::getGEPCost; 698 699 int getGEPCost(Type *PointeeType, const Value *Ptr, 700 ArrayRef<const Value *> Operands) { 701 const GlobalValue *BaseGV = nullptr; 702 if (Ptr != nullptr) { 703 // TODO: will remove this when pointers have an opaque type. 704 assert(Ptr->getType()->getScalarType()->getPointerElementType() == 705 PointeeType && 706 "explicit pointee type doesn't match operand's pointee type"); 707 BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 708 } 709 bool HasBaseReg = (BaseGV == nullptr); 710 711 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 712 APInt BaseOffset(PtrSizeBits, 0); 713 int64_t Scale = 0; 714 715 auto GTI = gep_type_begin(PointeeType, Operands); 716 Type *TargetType = nullptr; 717 718 // Handle the case where the GEP instruction has a single operand, 719 // the basis, therefore TargetType is a nullptr. 720 if (Operands.empty()) 721 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 722 723 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 724 TargetType = GTI.getIndexedType(); 725 // We assume that the cost of Scalar GEP with constant index and the 726 // cost of Vector GEP with splat constant index are the same. 727 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 728 if (!ConstIdx) 729 if (auto Splat = getSplatValue(*I)) 730 ConstIdx = dyn_cast<ConstantInt>(Splat); 731 if (StructType *STy = GTI.getStructTypeOrNull()) { 732 // For structures the index is always splat or scalar constant 733 assert(ConstIdx && "Unexpected GEP index"); 734 uint64_t Field = ConstIdx->getZExtValue(); 735 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 736 } else { 737 int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); 738 if (ConstIdx) { 739 BaseOffset += 740 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 741 } else { 742 // Needs scale register. 743 if (Scale != 0) 744 // No addressing mode takes two scale registers. 745 return TTI::TCC_Basic; 746 Scale = ElementSize; 747 } 748 } 749 } 750 751 // Assumes the address space is 0 when Ptr is nullptr. 752 unsigned AS = 753 (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); 754 755 if (static_cast<T *>(this)->isLegalAddressingMode( 756 TargetType, const_cast<GlobalValue *>(BaseGV), 757 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS)) 758 return TTI::TCC_Free; 759 return TTI::TCC_Basic; 760 } 761 762 using BaseT::getIntrinsicCost; 763 764 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 765 ArrayRef<const Value *> Arguments) { 766 // Delegate to the generic intrinsic handling code. This mostly provides an 767 // opportunity for targets to (for example) special case the cost of 768 // certain intrinsics based on constants used as arguments. 769 SmallVector<Type *, 8> ParamTys; 770 ParamTys.reserve(Arguments.size()); 771 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) 772 ParamTys.push_back(Arguments[Idx]->getType()); 773 return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys); 774 } 775 776 unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) { 777 if (isa<PHINode>(U)) 778 return TTI::TCC_Free; // Model all PHI nodes as free. 779 780 // Static alloca doesn't generate target instructions. 781 if (auto *A = dyn_cast<AllocaInst>(U)) 782 if (A->isStaticAlloca()) 783 return TTI::TCC_Free; 784 785 if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) { 786 return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), 787 GEP->getPointerOperand(), 788 Operands.drop_front()); 789 } 790 791 if (auto CS = ImmutableCallSite(U)) { 792 const Function *F = CS.getCalledFunction(); 793 if (!F) { 794 // Just use the called value type. 795 Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); 796 return static_cast<T *>(this) 797 ->getCallCost(cast<FunctionType>(FTy), CS.arg_size()); 798 } 799 800 SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end()); 801 return static_cast<T *>(this)->getCallCost(F, Arguments); 802 } 803 804 if (const CastInst *CI = dyn_cast<CastInst>(U)) { 805 // Result of a cmp instruction is often extended (to be used by other 806 // cmp instructions, logical or return instructions). These are usually 807 // nop on most sane targets. 808 if (isa<CmpInst>(CI->getOperand(0))) 809 return TTI::TCC_Free; 810 if (isa<SExtInst>(CI) || isa<ZExtInst>(CI) || isa<FPExtInst>(CI)) 811 return static_cast<T *>(this)->getExtCost(CI, Operands.back()); 812 } 813 814 return static_cast<T *>(this)->getOperationCost( 815 Operator::getOpcode(U), U->getType(), 816 U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr); 817 } 818 819 int getInstructionLatency(const Instruction *I) { 820 SmallVector<const Value *, 4> Operands(I->value_op_begin(), 821 I->value_op_end()); 822 if (getUserCost(I, Operands) == TTI::TCC_Free) 823 return 0; 824 825 if (isa<LoadInst>(I)) 826 return 4; 827 828 Type *DstTy = I->getType(); 829 830 // Usually an intrinsic is a simple instruction. 831 // A real function call is much slower. 832 if (auto *CI = dyn_cast<CallInst>(I)) { 833 const Function *F = CI->getCalledFunction(); 834 if (!F || static_cast<T *>(this)->isLoweredToCall(F)) 835 return 40; 836 // Some intrinsics return a value and a flag, we use the value type 837 // to decide its latency. 838 if (StructType* StructTy = dyn_cast<StructType>(DstTy)) 839 DstTy = StructTy->getElementType(0); 840 // Fall through to simple instructions. 841 } 842 843 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) 844 DstTy = VectorTy->getElementType(); 845 if (DstTy->isFloatingPointTy()) 846 return 3; 847 848 return 1; 849 } 850 }; 851 } 852 853 #endif 854